1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package zip
6 7 import (
8 "bufio"
9 "encoding/binary"
10 "errors"
11 "hash"
12 "hash/crc32"
13 "io"
14 "io/fs"
15 "bytes"
16 "unicode/utf8"
17 )
18 19 var (
20 errLongName = errors.New("zip: FileHeader.Name too long")
21 errLongExtra = errors.New("zip: FileHeader.Extra too long")
22 )
23 24 // Writer implements a zip file writer.
25 type Writer struct {
26 cw *countWriter
27 dir []*header
28 last *fileWriter
29 closed bool
30 compressors map[uint16]Compressor
31 comment string
32 33 // testHookCloseSizeOffset if non-nil is called with the size
34 // of offset of the central directory at Close.
35 testHookCloseSizeOffset func(size, offset uint64)
36 }
37 38 type header struct {
39 *FileHeader
40 offset uint64
41 raw bool
42 }
43 44 // NewWriter returns a new [Writer] writing a zip file to w.
45 func NewWriter(w io.Writer) *Writer {
46 return &Writer{cw: &countWriter{w: bufio.NewWriter(w)}}
47 }
48 49 // SetOffset sets the offset of the beginning of the zip data within the
50 // underlying writer. It should be used when the zip data is appended to an
51 // existing file, such as a binary executable.
52 // It must be called before any data is written.
53 func (w *Writer) SetOffset(n int64) {
54 if w.cw.count != 0 {
55 panic("zip: SetOffset called after data was written")
56 }
57 w.cw.count = n
58 }
59 60 // Flush flushes any buffered data to the underlying writer.
61 // Calling Flush is not normally necessary; calling Close is sufficient.
62 func (w *Writer) Flush() error {
63 return w.cw.w.(*bufio.Writer).Flush()
64 }
65 66 // SetComment sets the end-of-central-directory comment field.
67 // It can only be called before [Writer.Close].
68 func (w *Writer) SetComment(comment string) error {
69 if len(comment) > uint16max {
70 return errors.New("zip: Writer.Comment too long")
71 }
72 w.comment = comment
73 return nil
74 }
75 76 // Close finishes writing the zip file by writing the central directory.
77 // It does not close the underlying writer.
78 func (w *Writer) Close() error {
79 if w.last != nil && !w.last.closed {
80 if err := w.last.close(); err != nil {
81 return err
82 }
83 w.last = nil
84 }
85 if w.closed {
86 return errors.New("zip: writer closed twice")
87 }
88 w.closed = true
89 90 // write central directory
91 start := w.cw.count
92 for _, h := range w.dir {
93 var buf [directoryHeaderLen]byte
94 b := writeBuf(buf[:])
95 b.uint32(uint32(directoryHeaderSignature))
96 b.uint16(h.CreatorVersion)
97 b.uint16(h.ReaderVersion)
98 b.uint16(h.Flags)
99 b.uint16(h.Method)
100 b.uint16(h.ModifiedTime)
101 b.uint16(h.ModifiedDate)
102 b.uint32(h.CRC32)
103 if h.isZip64() || h.offset >= uint32max {
104 // the file needs a zip64 header. store maxint in both
105 // 32 bit size fields (and offset later) to signal that the
106 // zip64 extra header should be used.
107 b.uint32(uint32max) // compressed size
108 b.uint32(uint32max) // uncompressed size
109 110 // append a zip64 extra block to Extra
111 var buf [28]byte // 2x uint16 + 3x uint64
112 eb := writeBuf(buf[:])
113 eb.uint16(zip64ExtraID)
114 eb.uint16(24) // size = 3x uint64
115 eb.uint64(h.UncompressedSize64)
116 eb.uint64(h.CompressedSize64)
117 eb.uint64(h.offset)
118 h.Extra = append(h.Extra, buf[:]...)
119 } else {
120 b.uint32(h.CompressedSize)
121 b.uint32(h.UncompressedSize)
122 }
123 124 b.uint16(uint16(len(h.Name)))
125 b.uint16(uint16(len(h.Extra)))
126 b.uint16(uint16(len(h.Comment)))
127 b = b[4:] // skip disk number start and internal file attr (2x uint16)
128 b.uint32(h.ExternalAttrs)
129 if h.offset > uint32max {
130 b.uint32(uint32max)
131 } else {
132 b.uint32(uint32(h.offset))
133 }
134 if _, err := w.cw.Write(buf[:]); err != nil {
135 return err
136 }
137 if _, err := io.WriteString(w.cw, h.Name); err != nil {
138 return err
139 }
140 if _, err := w.cw.Write(h.Extra); err != nil {
141 return err
142 }
143 if _, err := io.WriteString(w.cw, h.Comment); err != nil {
144 return err
145 }
146 }
147 end := w.cw.count
148 149 records := uint64(len(w.dir))
150 size := uint64(end - start)
151 offset := uint64(start)
152 153 if f := w.testHookCloseSizeOffset; f != nil {
154 f(size, offset)
155 }
156 157 if records >= uint16max || size >= uint32max || offset >= uint32max {
158 var buf [directory64EndLen + directory64LocLen]byte
159 b := writeBuf(buf[:])
160 161 // zip64 end of central directory record
162 b.uint32(directory64EndSignature)
163 b.uint64(directory64EndLen - 12) // length minus signature (uint32) and length fields (uint64)
164 b.uint16(zipVersion45) // version made by
165 b.uint16(zipVersion45) // version needed to extract
166 b.uint32(0) // number of this disk
167 b.uint32(0) // number of the disk with the start of the central directory
168 b.uint64(records) // total number of entries in the central directory on this disk
169 b.uint64(records) // total number of entries in the central directory
170 b.uint64(size) // size of the central directory
171 b.uint64(offset) // offset of start of central directory with respect to the starting disk number
172 173 // zip64 end of central directory locator
174 b.uint32(directory64LocSignature)
175 b.uint32(0) // number of the disk with the start of the zip64 end of central directory
176 b.uint64(uint64(end)) // relative offset of the zip64 end of central directory record
177 b.uint32(1) // total number of disks
178 179 if _, err := w.cw.Write(buf[:]); err != nil {
180 return err
181 }
182 183 // store max values in the regular end record to signal
184 // that the zip64 values should be used instead
185 records = uint16max
186 size = uint32max
187 offset = uint32max
188 }
189 190 // write end record
191 var buf [directoryEndLen]byte
192 b := writeBuf(buf[:])
193 b.uint32(uint32(directoryEndSignature))
194 b = b[4:] // skip over disk number and first disk number (2x uint16)
195 b.uint16(uint16(records)) // number of entries this disk
196 b.uint16(uint16(records)) // number of entries total
197 b.uint32(uint32(size)) // size of directory
198 b.uint32(uint32(offset)) // start of directory
199 b.uint16(uint16(len(w.comment))) // byte size of EOCD comment
200 if _, err := w.cw.Write(buf[:]); err != nil {
201 return err
202 }
203 if _, err := io.WriteString(w.cw, w.comment); err != nil {
204 return err
205 }
206 207 return w.cw.w.(*bufio.Writer).Flush()
208 }
209 210 // Create adds a file to the zip file using the provided name.
211 // It returns a [Writer] to which the file contents should be written.
212 // The file contents will be compressed using the [Deflate] method.
213 // The name must be a relative path: it must not start with a drive
214 // letter (e.g. C:) or leading slash, and only forward slashes are
215 // allowed. To create a directory instead of a file, add a trailing
216 // slash to the name. Duplicate names will not overwrite previous entries
217 // and are appended to the zip file.
218 // The file's contents must be written to the [io.Writer] before the next
219 // call to [Writer.Create], [Writer.CreateHeader], or [Writer.Close].
220 func (w *Writer) Create(name string) (io.Writer, error) {
221 header := &FileHeader{
222 Name: name,
223 Method: Deflate,
224 }
225 return w.CreateHeader(header)
226 }
227 228 // detectUTF8 reports whether s is a valid UTF-8 string, and whether the string
229 // must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII,
230 // or any other common encoding).
231 func detectUTF8(s string) (valid, require bool) {
232 for i := 0; i < len(s); {
233 r, size := utf8.DecodeRuneInString(s[i:])
234 i += size
235 // Officially, ZIP uses CP-437, but many readers use the system's
236 // local character encoding. Most encoding are compatible with a large
237 // subset of CP-437, which itself is ASCII-like.
238 //
239 // Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those
240 // characters with localized currency and overline characters.
241 if r < 0x20 || r > 0x7d || r == 0x5c {
242 if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) {
243 return false, false
244 }
245 require = true
246 }
247 }
248 return true, require
249 }
250 251 // prepare performs the bookkeeping operations required at the start of
252 // CreateHeader and CreateRaw.
253 func (w *Writer) prepare(fh *FileHeader) error {
254 if w.last != nil && !w.last.closed {
255 if err := w.last.close(); err != nil {
256 return err
257 }
258 }
259 if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh {
260 // See https://golang.org/issue/11144 confusion.
261 return errors.New("archive/zip: invalid duplicate FileHeader")
262 }
263 return nil
264 }
265 266 // CreateHeader adds a file to the zip archive using the provided [FileHeader]
267 // for the file metadata. [Writer] takes ownership of fh and may mutate
268 // its fields. The caller must not modify fh after calling [Writer.CreateHeader].
269 //
270 // This returns a [Writer] to which the file contents should be written.
271 // The file's contents must be written to the io.Writer before the next
272 // call to [Writer.Create], [Writer.CreateHeader], [Writer.CreateRaw], or [Writer.Close].
273 func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) {
274 if err := w.prepare(fh); err != nil {
275 return nil, err
276 }
277 278 // The ZIP format has a sad state of affairs regarding character encoding.
279 // Officially, the name and comment fields are supposed to be encoded
280 // in CP-437 (which is mostly compatible with ASCII), unless the UTF-8
281 // flag bit is set. However, there are several problems:
282 //
283 // * Many ZIP readers still do not support UTF-8.
284 // * If the UTF-8 flag is cleared, several readers simply interpret the
285 // name and comment fields as whatever the local system encoding is.
286 //
287 // In order to avoid breaking readers without UTF-8 support,
288 // we avoid setting the UTF-8 flag if the strings are CP-437 compatible.
289 // However, if the strings require multibyte UTF-8 encoding and is a
290 // valid UTF-8 string, then we set the UTF-8 bit.
291 //
292 // For the case, where the user explicitly wants to specify the encoding
293 // as UTF-8, they will need to set the flag bit themselves.
294 utf8Valid1, utf8Require1 := detectUTF8(fh.Name)
295 utf8Valid2, utf8Require2 := detectUTF8(fh.Comment)
296 switch {
297 case fh.NonUTF8:
298 fh.Flags &^= 0x800
299 case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2):
300 fh.Flags |= 0x800
301 }
302 303 fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte
304 fh.ReaderVersion = zipVersion20
305 306 // If Modified is set, this takes precedence over MS-DOS timestamp fields.
307 if !fh.Modified.IsZero() {
308 // Contrary to the FileHeader.SetModTime method, we intentionally
309 // do not convert to UTC, because we assume the user intends to encode
310 // the date using the specified timezone. A user may want this control
311 // because many legacy ZIP readers interpret the timestamp according
312 // to the local timezone.
313 //
314 // The timezone is only non-UTC if a user directly sets the Modified
315 // field directly themselves. All other approaches sets UTC.
316 fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified)
317 318 // Use "extended timestamp" format since this is what Info-ZIP uses.
319 // Nearly every major ZIP implementation uses a different format,
320 // but at least most seem to be able to understand the other formats.
321 //
322 // This format happens to be identical for both local and central header
323 // if modification time is the only timestamp being encoded.
324 var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32)
325 mt := uint32(fh.Modified.Unix())
326 eb := writeBuf(mbuf[:])
327 eb.uint16(extTimeExtraID)
328 eb.uint16(5) // Size: SizeOf(uint8) + SizeOf(uint32)
329 eb.uint8(1) // Flags: ModTime
330 eb.uint32(mt) // ModTime
331 fh.Extra = append(fh.Extra, mbuf[:]...)
332 }
333 334 var (
335 ow io.Writer
336 fw *fileWriter
337 )
338 h := &header{
339 FileHeader: fh,
340 offset: uint64(w.cw.count),
341 }
342 343 if bytes.HasSuffix(fh.Name, "/") {
344 // Set the compression method to Store to ensure data length is truly zero,
345 // which the writeHeader method always encodes for the size fields.
346 // This is necessary as most compression formats have non-zero lengths
347 // even when compressing an empty string.
348 fh.Method = Store
349 fh.Flags &^= 0x8 // we will not write a data descriptor
350 351 // Explicitly clear sizes as they have no meaning for directories.
352 fh.CompressedSize = 0
353 fh.CompressedSize64 = 0
354 fh.UncompressedSize = 0
355 fh.UncompressedSize64 = 0
356 357 ow = dirWriter{}
358 } else {
359 fh.Flags |= 0x8 // we will write a data descriptor
360 361 fw = &fileWriter{
362 zipw: w.cw,
363 compCount: &countWriter{w: w.cw},
364 crc32: crc32.NewIEEE(),
365 }
366 comp := w.compressor(fh.Method)
367 if comp == nil {
368 return nil, ErrAlgorithm
369 }
370 var err error
371 fw.comp, err = comp(fw.compCount)
372 if err != nil {
373 return nil, err
374 }
375 fw.rawCount = &countWriter{w: fw.comp}
376 fw.header = h
377 ow = fw
378 }
379 w.dir = append(w.dir, h)
380 if err := writeHeader(w.cw, h); err != nil {
381 return nil, err
382 }
383 // If we're creating a directory, fw is nil.
384 w.last = fw
385 return ow, nil
386 }
387 388 func writeHeader(w io.Writer, h *header) error {
389 const maxUint16 = 1<<16 - 1
390 if len(h.Name) > maxUint16 {
391 return errLongName
392 }
393 if len(h.Extra) > maxUint16 {
394 return errLongExtra
395 }
396 397 var buf [fileHeaderLen]byte
398 b := writeBuf(buf[:])
399 b.uint32(uint32(fileHeaderSignature))
400 b.uint16(h.ReaderVersion)
401 b.uint16(h.Flags)
402 b.uint16(h.Method)
403 b.uint16(h.ModifiedTime)
404 b.uint16(h.ModifiedDate)
405 // In raw mode (caller does the compression), the values are either
406 // written here or in the trailing data descriptor based on the header
407 // flags.
408 if h.raw && !h.hasDataDescriptor() {
409 b.uint32(h.CRC32)
410 b.uint32(uint32(min(h.CompressedSize64, uint32max)))
411 b.uint32(uint32(min(h.UncompressedSize64, uint32max)))
412 } else {
413 // When this package handle the compression, these values are
414 // always written to the trailing data descriptor.
415 b.uint32(0) // crc32
416 b.uint32(0) // compressed size
417 b.uint32(0) // uncompressed size
418 }
419 b.uint16(uint16(len(h.Name)))
420 b.uint16(uint16(len(h.Extra)))
421 if _, err := w.Write(buf[:]); err != nil {
422 return err
423 }
424 if _, err := io.WriteString(w, h.Name); err != nil {
425 return err
426 }
427 _, err := w.Write(h.Extra)
428 return err
429 }
430 431 // CreateRaw adds a file to the zip archive using the provided [FileHeader] and
432 // returns a [Writer] to which the file contents should be written. The file's
433 // contents must be written to the io.Writer before the next call to [Writer.Create],
434 // [Writer.CreateHeader], [Writer.CreateRaw], or [Writer.Close].
435 //
436 // In contrast to [Writer.CreateHeader], the bytes passed to Writer are not compressed.
437 //
438 // CreateRaw's argument is stored in w. If the argument is a pointer to the embedded
439 // [FileHeader] in a [File] obtained from a [Reader] created from in-memory data,
440 // then w will refer to all of that memory.
441 func (w *Writer) CreateRaw(fh *FileHeader) (io.Writer, error) {
442 if err := w.prepare(fh); err != nil {
443 return nil, err
444 }
445 446 fh.CompressedSize = uint32(min(fh.CompressedSize64, uint32max))
447 fh.UncompressedSize = uint32(min(fh.UncompressedSize64, uint32max))
448 449 h := &header{
450 FileHeader: fh,
451 offset: uint64(w.cw.count),
452 raw: true,
453 }
454 w.dir = append(w.dir, h)
455 if err := writeHeader(w.cw, h); err != nil {
456 return nil, err
457 }
458 459 if bytes.HasSuffix(fh.Name, "/") {
460 w.last = nil
461 return dirWriter{}, nil
462 }
463 464 fw := &fileWriter{
465 header: h,
466 zipw: w.cw,
467 }
468 w.last = fw
469 return fw, nil
470 }
471 472 // Copy copies the file f (obtained from a [Reader]) into w. It copies the raw
473 // form directly bypassing decompression, compression, and validation.
474 func (w *Writer) Copy(f *File) error {
475 r, err := f.OpenRaw()
476 if err != nil {
477 return err
478 }
479 // Copy the FileHeader so w doesn't store a pointer to the data
480 // of f's entire archive. See #65499.
481 fh := f.FileHeader
482 fw, err := w.CreateRaw(&fh)
483 if err != nil {
484 return err
485 }
486 _, err = io.Copy(fw, r)
487 return err
488 }
489 490 // RegisterCompressor registers or overrides a custom compressor for a specific
491 // method ID. If a compressor for a given method is not found, [Writer] will
492 // default to looking up the compressor at the package level.
493 func (w *Writer) RegisterCompressor(method uint16, comp Compressor) {
494 if w.compressors == nil {
495 w.compressors = map[uint16]Compressor{}
496 }
497 w.compressors[method] = comp
498 }
499 500 // AddFS adds the files from fs.FS to the archive.
501 // It walks the directory tree starting at the root of the filesystem
502 // adding each file to the zip using deflate while maintaining the directory structure.
503 func (w *Writer) AddFS(fsys fs.FS) error {
504 return fs.WalkDir(fsys, ".", func(name string, d fs.DirEntry, err error) error {
505 if err != nil {
506 return err
507 }
508 if name == "." {
509 return nil
510 }
511 info, err := d.Info()
512 if err != nil {
513 return err
514 }
515 if !d.IsDir() && !info.Mode().IsRegular() {
516 return errors.New("zip: cannot add non-regular file")
517 }
518 h, err := FileInfoHeader(info)
519 if err != nil {
520 return err
521 }
522 h.Name = name
523 if d.IsDir() {
524 h.Name += "/"
525 }
526 h.Method = Deflate
527 fw, err := w.CreateHeader(h)
528 if err != nil {
529 return err
530 }
531 if d.IsDir() {
532 return nil
533 }
534 f, err := fsys.Open(name)
535 if err != nil {
536 return err
537 }
538 defer f.Close()
539 _, err = io.Copy(fw, f)
540 return err
541 })
542 }
543 544 func (w *Writer) compressor(method uint16) Compressor {
545 comp := w.compressors[method]
546 if comp == nil {
547 comp = compressor(method)
548 }
549 return comp
550 }
551 552 type dirWriter struct{}
553 554 func (dirWriter) Write(b []byte) (int, error) {
555 if len(b) == 0 {
556 return 0, nil
557 }
558 return 0, errors.New("zip: write to directory")
559 }
560 561 type fileWriter struct {
562 *header
563 zipw io.Writer
564 rawCount *countWriter
565 comp io.WriteCloser
566 compCount *countWriter
567 crc32 hash.Hash32
568 closed bool
569 }
570 571 func (w *fileWriter) Write(p []byte) (int, error) {
572 if w.closed {
573 return 0, errors.New("zip: write to closed file")
574 }
575 if w.raw {
576 return w.zipw.Write(p)
577 }
578 w.crc32.Write(p)
579 return w.rawCount.Write(p)
580 }
581 582 func (w *fileWriter) close() error {
583 if w.closed {
584 return errors.New("zip: file closed twice")
585 }
586 w.closed = true
587 if w.raw {
588 return w.writeDataDescriptor()
589 }
590 if err := w.comp.Close(); err != nil {
591 return err
592 }
593 594 // update FileHeader
595 fh := w.header.FileHeader
596 fh.CRC32 = w.crc32.Sum32()
597 fh.CompressedSize64 = uint64(w.compCount.count)
598 fh.UncompressedSize64 = uint64(w.rawCount.count)
599 600 if fh.isZip64() {
601 fh.CompressedSize = uint32max
602 fh.UncompressedSize = uint32max
603 fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions
604 } else {
605 fh.CompressedSize = uint32(fh.CompressedSize64)
606 fh.UncompressedSize = uint32(fh.UncompressedSize64)
607 }
608 609 return w.writeDataDescriptor()
610 }
611 612 func (w *fileWriter) writeDataDescriptor() error {
613 if !w.hasDataDescriptor() {
614 return nil
615 }
616 // Write data descriptor. This is more complicated than one would
617 // think, see e.g. comments in zipfile.c:putextended() and
618 // https://bugs.openjdk.org/browse/JDK-7073588.
619 // The approach here is to write 8 byte sizes if needed without
620 // adding a zip64 extra in the local header (too late anyway).
621 var buf []byte
622 if w.isZip64() {
623 buf = []byte{:dataDescriptor64Len}
624 } else {
625 buf = []byte{:dataDescriptorLen}
626 }
627 b := writeBuf(buf)
628 b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X
629 b.uint32(w.CRC32)
630 if w.isZip64() {
631 b.uint64(w.CompressedSize64)
632 b.uint64(w.UncompressedSize64)
633 } else {
634 b.uint32(w.CompressedSize)
635 b.uint32(w.UncompressedSize)
636 }
637 _, err := w.zipw.Write(buf)
638 return err
639 }
640 641 type countWriter struct {
642 w io.Writer
643 count int64
644 }
645 646 func (w *countWriter) Write(p []byte) (int, error) {
647 n, err := w.w.Write(p)
648 w.count += int64(n)
649 return n, err
650 }
651 652 type nopCloser struct {
653 io.Writer
654 }
655 656 func (w nopCloser) Close() error {
657 return nil
658 }
659 660 type writeBuf []byte
661 662 func (b *writeBuf) uint8(v uint8) {
663 (*b)[0] = v
664 *b = (*b)[1:]
665 }
666 667 func (b *writeBuf) uint16(v uint16) {
668 binary.LittleEndian.PutUint16(*b, v)
669 *b = (*b)[2:]
670 }
671 672 func (b *writeBuf) uint32(v uint32) {
673 binary.LittleEndian.PutUint32(*b, v)
674 *b = (*b)[4:]
675 }
676 677 func (b *writeBuf) uint64(v uint64) {
678 binary.LittleEndian.PutUint64(*b, v)
679 *b = (*b)[8:]
680 }
681