1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 /*
6 Package zip provides support for reading and writing ZIP archives.
7 8 See the [ZIP specification] for details.
9 10 This package does not support disk spanning.
11 12 A note about ZIP64:
13 14 To be backwards compatible the FileHeader has both 32 and 64 bit Size
15 fields. The 64 bit fields will always contain the correct value and
16 for normal archives both fields will be the same. For files requiring
17 the ZIP64 format the 32 bit fields will be 0xffffffff and the 64 bit
18 fields must be used instead.
19 20 [ZIP specification]: https://support.pkware.com/pkzip/appnote
21 */
22 package zip
23 24 import (
25 "io/fs"
26 "path"
27 "time"
28 )
29 30 // Compression methods.
31 const (
32 Store uint16 = 0 // no compression
33 Deflate uint16 = 8 // DEFLATE compressed
34 )
35 36 const (
37 fileHeaderSignature = 0x04034b50
38 directoryHeaderSignature = 0x02014b50
39 directoryEndSignature = 0x06054b50
40 directory64LocSignature = 0x07064b50
41 directory64EndSignature = 0x06064b50
42 dataDescriptorSignature = 0x08074b50 // de-facto standard; required by OS X Finder
43 fileHeaderLen = 30 // + filename + extra
44 directoryHeaderLen = 46 // + filename + extra + comment
45 directoryEndLen = 22 // + comment
46 dataDescriptorLen = 16 // four uint32: descriptor signature, crc32, compressed size, size
47 dataDescriptor64Len = 24 // two uint32: signature, crc32 | two uint64: compressed size, size
48 directory64LocLen = 20 //
49 directory64EndLen = 56 // + extra
50 51 // Constants for the first byte in CreatorVersion.
52 creatorFAT = 0
53 creatorUnix = 3
54 creatorNTFS = 11
55 creatorVFAT = 14
56 creatorMacOSX = 19
57 58 // Version numbers.
59 zipVersion20 = 20 // 2.0
60 zipVersion45 = 45 // 4.5 (reads and writes zip64 archives)
61 62 // Limits for non zip64 files.
63 uint16max = (1 << 16) - 1
64 uint32max = (1 << 32) - 1
65 66 // Extra header IDs.
67 //
68 // IDs 0..31 are reserved for official use by PKWARE.
69 // IDs above that range are defined by third-party vendors.
70 // Since ZIP lacked high precision timestamps (nor an official specification
71 // of the timezone used for the date fields), many competing extra fields
72 // have been invented. Pervasive use effectively makes them "official".
73 //
74 // See http://mdfs.net/Docs/Comp/Archiving/Zip/ExtraField
75 zip64ExtraID = 0x0001 // Zip64 extended information
76 ntfsExtraID = 0x000a // NTFS
77 unixExtraID = 0x000d // UNIX
78 extTimeExtraID = 0x5455 // Extended timestamp
79 infoZipUnixExtraID = 0x5855 // Info-ZIP Unix extension
80 )
81 82 // FileHeader describes a file within a ZIP file.
83 // See the [ZIP specification] for details.
84 //
85 // [ZIP specification]: https://support.pkware.com/pkzip/appnote
86 type FileHeader struct {
87 // Name is the name of the file.
88 //
89 // It must be a relative path, not start with a drive letter (such as "C:"),
90 // and must use forward slashes instead of back slashes. A trailing slash
91 // indicates that this file is a directory and should have no data.
92 Name []byte
93 94 // Comment is any arbitrary user-defined string shorter than 64KiB.
95 Comment []byte
96 97 // NonUTF8 indicates that Name and Comment are not encoded in UTF-8.
98 //
99 // By specification, the only other encoding permitted should be CP-437,
100 // but historically many ZIP readers interpret Name and Comment as whatever
101 // the system's local character encoding happens to be.
102 //
103 // This flag should only be set if the user intends to encode a non-portable
104 // ZIP file for a specific localized region. Otherwise, the Writer
105 // automatically sets the ZIP format's UTF-8 flag for valid UTF-8 strings.
106 NonUTF8 bool
107 108 CreatorVersion uint16
109 ReaderVersion uint16
110 Flags uint16
111 112 // Method is the compression method. If zero, Store is used.
113 Method uint16
114 115 // Modified is the modified time of the file.
116 //
117 // When reading, an extended timestamp is preferred over the legacy MS-DOS
118 // date field, and the offset between the times is used as the timezone.
119 // If only the MS-DOS date is present, the timezone is assumed to be UTC.
120 //
121 // When writing, an extended timestamp (which is timezone-agnostic) is
122 // always emitted. The legacy MS-DOS date field is encoded according to the
123 // location of the Modified time.
124 Modified time.Time
125 126 // ModifiedTime is an MS-DOS-encoded time.
127 //
128 // Deprecated: Use Modified instead.
129 ModifiedTime uint16
130 131 // ModifiedDate is an MS-DOS-encoded date.
132 //
133 // Deprecated: Use Modified instead.
134 ModifiedDate uint16
135 136 // CRC32 is the CRC32 checksum of the file content.
137 CRC32 uint32
138 139 // CompressedSize is the compressed size of the file in bytes.
140 // If either the uncompressed or compressed size of the file
141 // does not fit in 32 bits, CompressedSize is set to ^uint32(0).
142 //
143 // Deprecated: Use CompressedSize64 instead.
144 CompressedSize uint32
145 146 // UncompressedSize is the uncompressed size of the file in bytes.
147 // If either the uncompressed or compressed size of the file
148 // does not fit in 32 bits, UncompressedSize is set to ^uint32(0).
149 //
150 // Deprecated: Use UncompressedSize64 instead.
151 UncompressedSize uint32
152 153 // CompressedSize64 is the compressed size of the file in bytes.
154 CompressedSize64 uint64
155 156 // UncompressedSize64 is the uncompressed size of the file in bytes.
157 UncompressedSize64 uint64
158 159 Extra []byte
160 ExternalAttrs uint32 // Meaning depends on CreatorVersion
161 }
162 163 // FileInfo returns an fs.FileInfo for the [FileHeader].
164 func (h *FileHeader) FileInfo() fs.FileInfo {
165 return headerFileInfo{h}
166 }
167 168 // headerFileInfo implements [fs.FileInfo].
169 type headerFileInfo struct {
170 fh *FileHeader
171 }
172 173 func (fi headerFileInfo) Name() []byte { return path.Base(fi.fh.Name) }
174 func (fi headerFileInfo) Size() int64 {
175 if fi.fh.UncompressedSize64 > 0 {
176 return int64(fi.fh.UncompressedSize64)
177 }
178 return int64(fi.fh.UncompressedSize)
179 }
180 func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() }
181 func (fi headerFileInfo) ModTime() time.Time {
182 if fi.fh.Modified.IsZero() {
183 return fi.fh.ModTime()
184 }
185 return fi.fh.Modified.UTC()
186 }
187 func (fi headerFileInfo) Mode() fs.FileMode { return fi.fh.Mode() }
188 func (fi headerFileInfo) Type() fs.FileMode { return fi.fh.Mode().Type() }
189 func (fi headerFileInfo) Sys() any { return fi.fh }
190 191 func (fi headerFileInfo) Info() (fs.FileInfo, error) { return fi, nil }
192 193 func (fi headerFileInfo) String() string {
194 return fs.FormatFileInfo(fi)
195 }
196 197 // FileInfoHeader creates a partially-populated [FileHeader] from an
198 // fs.FileInfo.
199 // Because fs.FileInfo's Name method returns only the base name of
200 // the file it describes, it may be necessary to modify the Name field
201 // of the returned header to provide the full path name of the file.
202 // If compression is desired, callers should set the FileHeader.Method
203 // field; it is unset by default.
204 func FileInfoHeader(fi fs.FileInfo) (*FileHeader, error) {
205 size := fi.Size()
206 fh := &FileHeader{
207 Name: fi.Name(),
208 UncompressedSize64: uint64(size),
209 }
210 fh.SetModTime(fi.ModTime())
211 fh.SetMode(fi.Mode())
212 if fh.UncompressedSize64 > uint32max {
213 fh.UncompressedSize = uint32max
214 } else {
215 fh.UncompressedSize = uint32(fh.UncompressedSize64)
216 }
217 return fh, nil
218 }
219 220 type directoryEnd struct {
221 diskNbr uint32 // unused
222 dirDiskNbr uint32 // unused
223 dirRecordsThisDisk uint64 // unused
224 directoryRecords uint64
225 directorySize uint64
226 directoryOffset uint64 // relative to file
227 commentLen uint16
228 comment []byte
229 }
230 231 // timeZone returns a *time.Location based on the provided offset.
232 // If the offset is non-sensible, then this uses an offset of zero.
233 func timeZone(offset time.Duration) *time.Location {
234 const (
235 minOffset = -12 * time.Hour // E.g., Baker island at -12:00
236 maxOffset = +14 * time.Hour // E.g., Line island at +14:00
237 offsetAlias = 15 * time.Minute // E.g., Nepal at +5:45
238 )
239 offset = offset.Round(offsetAlias)
240 if offset < minOffset || maxOffset < offset {
241 offset = 0
242 }
243 return time.FixedZone("", int(offset/time.Second))
244 }
245 246 // msDosTimeToTime converts an MS-DOS date and time into a time.Time.
247 // The resolution is 2s.
248 // See: https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-dosdatetimetofiletime
249 func msDosTimeToTime(dosDate, dosTime uint16) time.Time {
250 return time.Date(
251 // date bits 0-4: day of month; 5-8: month; 9-15: years since 1980
252 int(dosDate>>9+1980),
253 time.Month(dosDate>>5&0xf),
254 int(dosDate&0x1f),
255 256 // time bits 0-4: second/2; 5-10: minute; 11-15: hour
257 int(dosTime>>11),
258 int(dosTime>>5&0x3f),
259 int(dosTime&0x1f*2),
260 0, // nanoseconds
261 262 time.UTC,
263 )
264 }
265 266 // timeToMsDosTime converts a time.Time to an MS-DOS date and time.
267 // The resolution is 2s.
268 // See: https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-filetimetodosdatetime
269 func timeToMsDosTime(t time.Time) (fDate uint16, fTime uint16) {
270 fDate = uint16(t.Day() + int(t.Month())<<5 + (t.Year()-1980)<<9)
271 fTime = uint16(t.Second()/2 + t.Minute()<<5 + t.Hour()<<11)
272 return
273 }
274 275 // ModTime returns the modification time in UTC using the legacy
276 // [ModifiedDate] and [ModifiedTime] fields.
277 //
278 // Deprecated: Use [Modified] instead.
279 func (h *FileHeader) ModTime() time.Time {
280 return msDosTimeToTime(h.ModifiedDate, h.ModifiedTime)
281 }
282 283 // SetModTime sets the [Modified], [ModifiedTime], and [ModifiedDate] fields
284 // to the given time in UTC.
285 //
286 // Deprecated: Use [Modified] instead.
287 func (h *FileHeader) SetModTime(t time.Time) {
288 t = t.UTC() // Convert to UTC for compatibility
289 h.Modified = t
290 h.ModifiedDate, h.ModifiedTime = timeToMsDosTime(t)
291 }
292 293 const (
294 // Unix constants. The specification doesn't mention them,
295 // but these seem to be the values agreed on by tools.
296 s_IFMT = 0xf000
297 s_IFSOCK = 0xc000
298 s_IFLNK = 0xa000
299 s_IFREG = 0x8000
300 s_IFBLK = 0x6000
301 s_IFDIR = 0x4000
302 s_IFCHR = 0x2000
303 s_IFIFO = 0x1000
304 s_ISUID = 0x800
305 s_ISGID = 0x400
306 s_ISVTX = 0x200
307 308 msdosDir = 0x10
309 msdosReadOnly = 0x01
310 )
311 312 // Mode returns the permission and mode bits for the [FileHeader].
313 func (h *FileHeader) Mode() (mode fs.FileMode) {
314 switch h.CreatorVersion >> 8 {
315 case creatorUnix, creatorMacOSX:
316 mode = unixModeToFileMode(h.ExternalAttrs >> 16)
317 case creatorNTFS, creatorVFAT, creatorFAT:
318 mode = msdosModeToFileMode(h.ExternalAttrs)
319 }
320 if len(h.Name) > 0 && h.Name[len(h.Name)-1] == '/' {
321 mode |= fs.ModeDir
322 }
323 return mode
324 }
325 326 // SetMode changes the permission and mode bits for the [FileHeader].
327 func (h *FileHeader) SetMode(mode fs.FileMode) {
328 h.CreatorVersion = h.CreatorVersion&0xff | creatorUnix<<8
329 h.ExternalAttrs = fileModeToUnixMode(mode) << 16
330 331 // set MSDOS attributes too, as the original zip does.
332 if mode&fs.ModeDir != 0 {
333 h.ExternalAttrs |= msdosDir
334 }
335 if mode&0200 == 0 {
336 h.ExternalAttrs |= msdosReadOnly
337 }
338 }
339 340 // isZip64 reports whether the file size exceeds the 32 bit limit
341 func (h *FileHeader) isZip64() bool {
342 return h.CompressedSize64 >= uint32max || h.UncompressedSize64 >= uint32max
343 }
344 345 func (h *FileHeader) hasDataDescriptor() bool {
346 return h.Flags&0x8 != 0
347 }
348 349 func msdosModeToFileMode(m uint32) (mode fs.FileMode) {
350 if m&msdosDir != 0 {
351 mode = fs.ModeDir | 0777
352 } else {
353 mode = 0666
354 }
355 if m&msdosReadOnly != 0 {
356 mode &^= 0222
357 }
358 return mode
359 }
360 361 func fileModeToUnixMode(mode fs.FileMode) uint32 {
362 var m uint32
363 switch mode & fs.ModeType {
364 default:
365 m = s_IFREG
366 case fs.ModeDir:
367 m = s_IFDIR
368 case fs.ModeSymlink:
369 m = s_IFLNK
370 case fs.ModeNamedPipe:
371 m = s_IFIFO
372 case fs.ModeSocket:
373 m = s_IFSOCK
374 case fs.ModeDevice:
375 m = s_IFBLK
376 case fs.ModeDevice | fs.ModeCharDevice:
377 m = s_IFCHR
378 }
379 if mode&fs.ModeSetuid != 0 {
380 m |= s_ISUID
381 }
382 if mode&fs.ModeSetgid != 0 {
383 m |= s_ISGID
384 }
385 if mode&fs.ModeSticky != 0 {
386 m |= s_ISVTX
387 }
388 return m | uint32(mode&0777)
389 }
390 391 func unixModeToFileMode(m uint32) fs.FileMode {
392 mode := fs.FileMode(m & 0777)
393 switch m & s_IFMT {
394 case s_IFBLK:
395 mode |= fs.ModeDevice
396 case s_IFCHR:
397 mode |= fs.ModeDevice | fs.ModeCharDevice
398 case s_IFDIR:
399 mode |= fs.ModeDir
400 case s_IFIFO:
401 mode |= fs.ModeNamedPipe
402 case s_IFLNK:
403 mode |= fs.ModeSymlink
404 case s_IFREG:
405 // nothing to do
406 case s_IFSOCK:
407 mode |= fs.ModeSocket
408 }
409 if m&s_ISGID != 0 {
410 mode |= fs.ModeSetgid
411 }
412 if m&s_ISUID != 0 {
413 mode |= fs.ModeSetuid
414 }
415 if m&s_ISVTX != 0 {
416 mode |= fs.ModeSticky
417 }
418 return mode
419 }
420