001/*
002 * Copyright (c) 2004-2010, Kohsuke Kawaguchi
003 * All rights reserved.
004 * 
005 * Copyright (c) 2012, Martin Schroeder, Intel Mobile Communications GmbH
006 *
007 * Redistribution and use in source and binary forms, with or without modification, are permitted provided
008 * that the following conditions are met:
009 *
010 *     * Redistributions of source code must retain the above copyright notice, this list of
011 *       conditions and the following disclaimer.
012 *     * Redistributions in binary form must reproduce the above copyright notice, this list of
013 *       conditions and the following disclaimer in the documentation and/or other materials
014 *       provided with the distribution.
015 *
016 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
017 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
018 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
019 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
020 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
021 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
022 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
023 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
024 */
025
026package org.kohsuke.stapler.framework.io;
027
028import java.nio.file.Files;
029import java.nio.file.StandardOpenOption;
030import org.apache.commons.io.output.CountingOutputStream;
031import org.kohsuke.stapler.StaplerRequest;
032import org.kohsuke.stapler.StaplerResponse;
033
034import javax.servlet.http.HttpServletResponse;
035import java.io.DataInputStream;
036import java.io.EOFException;
037import java.io.File;
038import java.io.IOException;
039import java.io.InputStream;
040import java.io.InputStreamReader;
041import java.io.OutputStream;
042import java.io.RandomAccessFile;
043import java.io.Reader;
044import java.io.Writer;
045import java.nio.charset.Charset;
046import com.jcraft.jzlib.GZIPInputStream;
047
048/**
049 * Represents a large text data.
050 *
051 * <p>
052 * This class defines methods for handling progressive text update.
053 *
054 * <h2>Usage</h2>
055 * <p>
056 *
057 * @author Kohsuke Kawaguchi
058 */
059public class LargeText {
060    /**
061     * Represents the data source of this text.
062     */
063    private interface Source {
064        Session open() throws IOException;
065        long length();
066        boolean exists();
067    }
068    private final Source source;
069
070    protected final Charset charset;
071
072    private volatile boolean completed;
073
074    public LargeText(File file, boolean completed) {
075        this(file,Charset.defaultCharset(),completed);
076    }
077    
078    /**
079     * @since 1.196
080     * 
081     * @param transparentGunzip if set to true, this class will detect if the
082     * given file is compressed with GZIP. If so, it will transparently
083     * uncompress its content during read-access. Do note that the underlying
084     * file is not altered and remains compressed.
085     */
086    public LargeText(File file, boolean completed, boolean transparentGunzip) {
087        this(file, Charset.defaultCharset(), completed, transparentGunzip);
088    }
089
090    public LargeText(final File file, Charset charset, boolean completed) {
091        this(file, charset, completed, false);
092    }
093    
094    /**
095     * @since 1.196
096     * 
097     * @param transparentGunzip if set to true, this class will detect if the
098     * given file is compressed with GZIP. If so, it will transparently
099     * uncompress its content during read-access. Do note that the underlying
100     * file is not altered and remains compressed.
101     */
102    public LargeText(final File file, Charset charset, boolean completed, boolean transparentGunzip) {
103        this.charset = charset;
104        if (transparentGunzip && GzipAwareSession.isGzipStream(file)) {
105            this.source = new Source() {
106                public Session open() throws IOException {
107                    return new GzipAwareSession(file);
108                }
109    
110                public long length() {
111                    return GzipAwareSession.getGzipStreamSize(file);
112                }
113    
114                public boolean exists() {
115                    return file.exists();
116                }
117            };
118        } else {
119            this.source = new Source() {
120                public Session open() throws IOException {
121                    return new FileSession(file);
122                }
123    
124                public long length() {
125                    return file.length();
126                }
127    
128                public boolean exists() {
129                    return file.exists();
130                }
131            };
132        }
133        this.completed = completed;
134    }
135
136    public LargeText(ByteBuffer memory, boolean completed) {
137        this(memory,Charset.defaultCharset(),completed);
138    }
139
140    public LargeText(final ByteBuffer memory, Charset charset, boolean completed) {
141        this.charset = charset;
142        this.source = new Source() {
143            public Session open() throws IOException {
144                return new BufferSession(memory);
145            }
146
147            public long length() {
148                return memory.length();
149            }
150
151            public boolean exists() {
152                return true;
153            }
154        };
155        this.completed = completed;
156    }
157
158    public void markAsComplete() {
159        completed = true;
160    }
161
162    public boolean isComplete() {
163        return completed;
164    }
165
166    public long length() {
167        return source.length();
168    }
169
170    /**
171     * Returns {@link Reader} for reading the raw bytes.
172     */
173    public Reader readAll() throws IOException {
174        return new InputStreamReader(new InputStream() {
175            final Session session = source.open();
176            public int read() throws IOException {
177                byte[] buf = new byte[1];
178                int n = session.read(buf);
179                if(n==1)    return buf[0];
180                else        return -1; // EOF
181            }
182
183            public int read(byte[] buf, int off, int len) throws IOException {
184                return session.read(buf,off,len);
185            }
186
187            public void close() throws IOException {
188                session.close();
189            }
190        },charset);
191    }
192
193    public long writeLogTo(long start, Writer w) throws IOException {
194        return writeLogTo(start, new WriterOutputStream(w, charset));
195    }
196
197    /**
198     * Writes the tail portion of the file to the {@link OutputStream}.
199     *
200     * @param start
201     *      The byte offset in the input file where the write operation starts.
202     *
203     * @return
204     *      if the file is still being written, this method writes the file
205     *      until the last newline character and returns the offset to start
206     *      the next write operation.
207     * @throws EOFException if the start position is larger than the file size
208     */
209    public long writeLogTo(long start, OutputStream out) throws IOException {
210        CountingOutputStream os = new CountingOutputStream(out);
211
212        Session f = source.open();
213        f.skip(start);
214
215        if(completed) {
216            // write everything till EOF
217            byte[] buf = new byte[1024];
218            int sz;
219            while((sz=f.read(buf))>=0)
220                os.write(buf,0,sz);
221        } else {
222            ByteBuf buf = new ByteBuf(null,f);
223            HeadMark head = new HeadMark(buf);
224            TailMark tail = new TailMark(buf);
225            buf = null;
226
227            int readLines = 0;
228            while(tail.moveToNextLine(f) && readLines++ < MAX_LINES_READ) {
229                head.moveTo(tail,os);
230            }
231            head.finish(os);
232        }
233
234        f.close();
235        os.flush();
236
237        return os.getByteCount()+start;
238    }
239
240    /**
241     * Implements the progressive text handling.
242     * This method is used as a "web method" with progressiveText.jelly.
243     */
244    public void doProgressText(StaplerRequest req, StaplerResponse rsp) throws IOException {
245        setContentType(rsp);
246        rsp.setStatus(HttpServletResponse.SC_OK);
247
248        if(!source.exists()) {
249            // file doesn't exist yet
250            rsp.addHeader("X-Text-Size","0");
251            rsp.addHeader("X-More-Data","true");
252            return;
253        }
254
255        long start = 0;
256        String s = req.getParameter("start");
257        if(s!=null)
258            start = Long.parseLong(s);
259
260        if(source.length() < start )
261            start = 0;  // text rolled over
262
263        CharSpool spool = new CharSpool();
264        long r = writeLogTo(start,spool);
265
266        rsp.addHeader("X-Text-Size",String.valueOf(r));
267        if(!completed)
268            rsp.addHeader("X-More-Data","true");
269
270        Writer w = createWriter(req, rsp, r - start);
271        spool.writeTo(new LineEndNormalizingWriter(w));
272        w.close();
273    }
274
275    protected void setContentType(StaplerResponse rsp) {
276        rsp.setContentType("text/plain;charset=UTF-8");
277    }
278
279    protected Writer createWriter(StaplerRequest req, StaplerResponse rsp, long size) throws IOException {
280        // when sending big text, try compression. don't bother if it's small
281        if(size >4096)
282            return rsp.getCompressedWriter(req);
283        else
284            return rsp.getWriter();
285    }
286
287    /**
288     * Points to a byte in the buffer.
289     */
290    private static class Mark {
291        protected ByteBuf buf;
292        protected int pos;
293
294        public Mark(ByteBuf buf) {
295            this.buf = buf;
296        }
297    }
298
299    /**
300     * Points to the start of the region that's not committed
301     * to the output yet.
302     */
303    private static final class HeadMark extends Mark {
304        public HeadMark(ByteBuf buf) {
305            super(buf);
306        }
307
308        /**
309         * Moves this mark to 'that' mark, and writes the data
310         * in between to {@link OutputStream} if necessary.
311         */
312        void moveTo(Mark that, OutputStream os) throws IOException {
313            while(this.buf!=that.buf) {
314                os.write(buf.buf,0,buf.size);
315                buf = buf.next;
316                pos = 0;
317            }
318
319            this.pos = that.pos;
320        }
321
322        void finish(OutputStream os) throws IOException {
323            os.write(buf.buf,0,pos);
324        }
325    }
326
327    /**
328     * Points to the end of the region.
329     */
330    private static final class TailMark extends Mark {
331        public TailMark(ByteBuf buf) {
332            super(buf);
333        }
334
335        boolean moveToNextLine(Session f) throws IOException {
336            while(true) {
337                while(pos==buf.size) {
338                    if(!buf.isFull()) {
339                        // read until EOF
340                        return false;
341                    } else {
342                        // read into the next buffer
343                        buf = new ByteBuf(buf,f);
344                        pos = 0;
345                    }
346                }
347                byte b = buf.buf[pos++];
348                if(b=='\r' || b=='\n')
349                    return true;
350            }
351        }
352    }
353
354    /**
355     * Variable length byte buffer implemented as a linked list of fixed length buffer.
356     */
357    private static final class ByteBuf {
358        private final byte[] buf = new byte[1024];
359        private int size = 0;
360        private ByteBuf next;
361
362        public ByteBuf(ByteBuf previous, Session f) throws IOException {
363            if(previous!=null) {
364                assert previous.next==null;
365                previous.next = this;
366            }
367
368            while(!this.isFull()) {
369                int chunk = f.read(buf, size, buf.length - size);
370                if(chunk==-1)
371                    return;
372                size+= chunk;
373            }
374        }
375
376        public boolean isFull() {
377            return buf.length==size;
378        }
379    }
380
381    /**
382     * Represents the read session of the {@link Source}.
383     * Methods generally follow the contracts of {@link InputStream}.
384     */
385    private interface Session {
386        void close() throws IOException;
387        void skip(long start) throws IOException;
388        int read(byte[] buf) throws IOException;
389        int read(byte[] buf, int offset, int length) throws IOException;
390    }
391
392    /**
393     * {@link Session} implementation over {@link RandomAccessFile}.
394     */
395    private static final class FileSession implements Session {
396        private final RandomAccessFile file;
397
398        public FileSession(File file) throws IOException {
399            this.file = new RandomAccessFile(file,"r");
400        }
401
402        public void close() throws IOException {
403            file.close();
404        }
405
406        public void skip(long start) throws IOException {
407            file.seek(file.getFilePointer()+start);
408        }
409
410        public int read(byte[] buf) throws IOException {
411            return file.read(buf);
412        }
413
414        public int read(byte[] buf, int offset, int length) throws IOException {
415            return file.read(buf,offset,length);
416        }
417    }
418    
419    /**
420     * {@link Session} implementation over {@link GZIPInputStream}.
421     * <p>
422     * Always use {@link GzipAwareSession#isGzipStream(File)} to check if you
423     * really deal with a GZIPed file before you invoke this class. Otherwise,
424     * {@link GZIPInputStream} might throw an exception.
425     */
426    private static final class GzipAwareSession implements Session {
427        private final GZIPInputStream gz;
428
429        public GzipAwareSession(File file) throws IOException {
430            this.gz = new GZIPInputStream(Files.newInputStream(file.toPath(), StandardOpenOption.READ));
431        }
432
433        public void close() throws IOException {
434            gz.close();
435        }
436
437        public void skip(long start) throws IOException {
438            while (start > 0) {
439                start -= gz.skip(start);
440            }
441        }
442
443        public int read(byte[] buf) throws IOException {
444            return gz.read(buf);
445        }
446
447        public int read(byte[] buf, int offset, int length) throws IOException {
448            return gz.read(buf,offset,length);
449        }
450    
451        /**
452         * Checks the first two bytes of the target file and return true if
453         * they equal the GZIP magic number.
454         * @param file
455         * @return true, if the first two bytes are the GZIP magic number.
456         */
457        public static boolean isGzipStream(File file) {
458            try (InputStream in = Files.newInputStream(file.toPath(), StandardOpenOption.READ);
459                 DataInputStream din = new DataInputStream(in)) {
460                return din.readShort()==0x1F8B;
461            } catch (IOException ex) {
462                return false;
463            }
464        }
465        
466        /**
467         * Returns the uncompressed size of the file in a quick, but unreliable
468         * manner. It will not report the correct size if:
469         * <ol>
470         * <li>The compressed size is larger than 2<sup>32</sup> bytes.</li>
471         * <li>The file is broken or truncated.</li>
472         * <li>The file has not been generated by a standard-conformant compressor.</li>
473         * <li>It is a multi-volume GZIP stream.</li>
474         * </ol>
475         * <p>
476         * The advantage of this approach is, that it only reads the first 2
477         * and last 4 bytes of the target file. If the first 2 bytes are not
478         * the GZIP magic number, the raw length of the file is returned.
479         * 
480         * @see #isGzipStream(File)
481         * @param file
482         * @return the size of the uncompressed file content.
483         */
484        public static long getGzipStreamSize(File file) {
485            if (!isGzipStream(file)) {
486                return file.length();
487            }
488            RandomAccessFile raf = null;
489            try {
490                raf = new RandomAccessFile(file, "r");
491                if (raf.length() <= 4) {
492                    raf.close();
493                    return file.length();
494                }
495                raf.seek(raf.length() - 4);
496                int b4 = raf.read();
497                int b3 = raf.read();
498                int b2 = raf.read();
499                int b1 = raf.read();
500                return (b1 << 24) + (b2 << 16) + (b3 << 8) + b4;
501            } catch (IOException ex) {
502                return file.length();
503            } finally {
504                if (raf!=null)
505                    try {
506                        raf.close();
507                    } catch (IOException e) {
508                        // ignore
509                    }
510            }
511        }
512    }
513
514    /**
515     * {@link Session} implementation over {@link ByteBuffer}.
516     */
517    private static final class BufferSession implements Session {
518        private final InputStream in;
519
520        public BufferSession(ByteBuffer buf) {
521            this.in = buf.newInputStream();
522        }
523
524
525        public void close() throws IOException {
526            in.close();
527        }
528
529        public void skip(long start) throws IOException {
530            while (start > 0) {
531                long diff = in.skip(start);
532                if (diff == 0) {
533                    throw new EOFException("Attempting to read past end of buffer");
534                }
535                start -= diff;
536            }
537        }
538
539        public int read(byte[] buf) throws IOException {
540            return in.read(buf);
541        }
542
543        public int read(byte[] buf, int offset, int length) throws IOException {
544            return in.read(buf,offset,length);
545        }
546    }
547
548    /**
549     * We cap the # of lines read in one batch to avoid buffering too much in memory.
550     */
551    private static final int MAX_LINES_READ = 10000;
552}