001/*
002 * $Id: JBIG2SegmentReader.java 4784 2011-03-15 08:33:00Z blowagie $
003 *
004 * This file is part of the iText (R) project.
005 * Copyright (c) 1998-2011 1T3XT BVBA
006 * Authors: Bruno Lowagie, Paulo Soares, et al.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU Affero General Public License version 3
010 * as published by the Free Software Foundation with the addition of the
011 * following permission added to Section 15 as permitted in Section 7(a):
012 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY 1T3XT,
013 * 1T3XT DISCLAIMS THE WARRANTY OF NON INFRINGEMENT OF THIRD PARTY RIGHTS.
014 *
015 * This program is distributed in the hope that it will be useful, but
016 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
017 * or FITNESS FOR A PARTICULAR PURPOSE.
018 * See the GNU Affero General Public License for more details.
019 * You should have received a copy of the GNU Affero General Public License
020 * along with this program; if not, see http://www.gnu.org/licenses or write to
021 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
022 * Boston, MA, 02110-1301 USA, or download the license from the following URL:
023 * http://itextpdf.com/terms-of-use/
024 *
025 * The interactive user interfaces in modified source and object code versions
026 * of this program must display Appropriate Legal Notices, as required under
027 * Section 5 of the GNU Affero General Public License.
028 *
029 * In accordance with Section 7(b) of the GNU Affero General Public License,
030 * a covered work must retain the producer line in every PDF that is created
031 * or manipulated using iText.
032 *
033 * You can be released from the requirements of the license by purchasing
034 * a commercial license. Buying such a license is mandatory as soon as you
035 * develop commercial activities involving the iText software without
036 * disclosing the source code of your own applications.
037 * These activities include: offering paid services to customers as an ASP,
038 * serving PDFs on the fly in a web application, shipping iText with a closed
039 * source product.
040 *
041 * For more information, please contact iText Software Corp. at this
042 * address: sales@itextpdf.com
043 */
044package com.itextpdf.text.pdf.codec;
045
046import java.io.ByteArrayOutputStream;
047import java.io.IOException;
048import java.util.Iterator;
049import java.util.SortedMap;
050import java.util.SortedSet;
051import java.util.TreeMap;
052import java.util.TreeSet;
053
054import com.itextpdf.text.error_messages.MessageLocalization;
055import com.itextpdf.text.pdf.RandomAccessFileOrArray;
056
057/**
058 * Class to read a JBIG2 file at a basic level: understand all the segments,
059 * understand what segments belong to which pages, how many pages there are,
060 * what the width and height of each page is, and global segments if there
061 * are any.  Or: the minimum required to be able to take a normal sequential
062 * or random-access organized file, and be able to embed JBIG2 pages as images
063 * in a PDF.
064 *
065 * TODO: the indeterminate-segment-size value of dataLength, else?
066 *
067 * @since 2.1.5
068 */
069
070public class JBIG2SegmentReader {
071
072        public static final int SYMBOL_DICTIONARY = 0; //see 7.4.2.
073
074        public static final int INTERMEDIATE_TEXT_REGION = 4; //see 7.4.3.
075        public static final int IMMEDIATE_TEXT_REGION = 6; //see 7.4.3.
076        public static final int IMMEDIATE_LOSSLESS_TEXT_REGION = 7; //see 7.4.3.
077        public static final int PATTERN_DICTIONARY = 16; //see 7.4.4.
078        public static final int INTERMEDIATE_HALFTONE_REGION = 20; //see 7.4.5.
079        public static final int IMMEDIATE_HALFTONE_REGION = 22; //see 7.4.5.
080        public static final int IMMEDIATE_LOSSLESS_HALFTONE_REGION = 23; //see 7.4.5.
081        public static final int INTERMEDIATE_GENERIC_REGION = 36; //see 7.4.6.
082        public static final int IMMEDIATE_GENERIC_REGION = 38; //see 7.4.6.
083        public static final int IMMEDIATE_LOSSLESS_GENERIC_REGION = 39; //see 7.4.6.
084        public static final int INTERMEDIATE_GENERIC_REFINEMENT_REGION = 40; //see 7.4.7.
085        public static final int IMMEDIATE_GENERIC_REFINEMENT_REGION = 42; //see 7.4.7.
086        public static final int IMMEDIATE_LOSSLESS_GENERIC_REFINEMENT_REGION = 43; //see 7.4.7.
087
088        public static final int PAGE_INFORMATION = 48; //see 7.4.8.
089        public static final int END_OF_PAGE = 49; //see 7.4.9.
090        public static final int END_OF_STRIPE = 50; //see 7.4.10.
091        public static final int END_OF_FILE = 51; //see 7.4.11.
092        public static final int PROFILES = 52; //see 7.4.12.
093        public static final int TABLES = 53; //see 7.4.13.
094        public static final int EXTENSION = 62; //see 7.4.14.
095
096        private final SortedMap<Integer, JBIG2Segment> segments = new TreeMap<Integer, JBIG2Segment>();
097        private final SortedMap<Integer, JBIG2Page> pages = new TreeMap<Integer, JBIG2Page>();
098        private final SortedSet<JBIG2Segment> globals = new TreeSet<JBIG2Segment>();
099        private RandomAccessFileOrArray ra;
100        private boolean sequential;
101        private boolean number_of_pages_known;
102        private int number_of_pages = -1;
103        private boolean read = false;
104
105        /**
106         * Inner class that holds information about a JBIG2 segment.
107         * @since       2.1.5
108         */
109        public static class JBIG2Segment implements Comparable<JBIG2Segment> {
110
111                public final int segmentNumber;
112                public long dataLength = -1;
113                public int page = -1;
114                public int[] referredToSegmentNumbers = null;
115                public boolean[] segmentRetentionFlags = null;
116                public int type = -1;
117                public boolean deferredNonRetain = false;
118                public int countOfReferredToSegments = -1;
119                public byte[] data = null;
120                public byte[] headerData = null;
121                public boolean page_association_size = false;
122                public int page_association_offset = -1;
123
124                public JBIG2Segment(int segment_number) {
125                        this.segmentNumber = segment_number;
126                }
127
128                public int compareTo(JBIG2Segment s) {
129                        return this.segmentNumber - s.segmentNumber;
130                }
131
132
133        }
134        /**
135         * Inner class that holds information about a JBIG2 page.
136         * @since       2.1.5
137         */
138        public static class JBIG2Page {
139                public final int page;
140                private final JBIG2SegmentReader sr;
141                private final SortedMap<Integer, JBIG2Segment> segs = new TreeMap<Integer, JBIG2Segment>();
142                public int pageBitmapWidth = -1;
143                public int pageBitmapHeight = -1;
144                public JBIG2Page(int page, JBIG2SegmentReader sr) {
145                        this.page = page;
146                        this.sr = sr;
147                }
148                /**
149                 * return as a single byte array the header-data for each segment in segment number
150                 * order, EMBEDDED organization, but i am putting the needed segments in SEQUENTIAL organization.
151                 * if for_embedding, skip the segment types that are known to be not for acrobat.
152                 * @param for_embedding
153                 * @return      a byte array
154                 * @throws IOException
155                 */
156                public byte[] getData(boolean for_embedding) throws IOException {
157                        ByteArrayOutputStream os = new ByteArrayOutputStream();
158                        for (Integer sn : segs.keySet()) {
159                                JBIG2Segment s = segs.get(sn);
160
161                                // pdf reference 1.4, section 3.3.6 JBIG2Decode Filter
162                                // D.3 Embedded organisation
163                                if ( for_embedding &&
164                                                ( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) {
165                                        continue;
166                                }
167
168                                if ( for_embedding ) {
169                                        // change the page association to page 1
170                                        byte[] headerData_emb = copyByteArray(s.headerData);
171                                        if ( s.page_association_size ) {
172                                                headerData_emb[s.page_association_offset] = 0x0;
173                                                headerData_emb[s.page_association_offset+1] = 0x0;
174                                                headerData_emb[s.page_association_offset+2] = 0x0;
175                                                headerData_emb[s.page_association_offset+3] = 0x1;
176                                        } else {
177                                                headerData_emb[s.page_association_offset] = 0x1;
178                                        }
179                                        os.write(headerData_emb);
180                                } else {
181                                        os.write(s.headerData);
182                                }
183                                os.write(s.data);
184                        }
185                        os.close();
186                        return os.toByteArray();
187                }
188                public void addSegment(JBIG2Segment s) {
189                        segs.put(Integer.valueOf(s.segmentNumber), s);
190                }
191
192        }
193
194        public JBIG2SegmentReader(RandomAccessFileOrArray ra ) throws IOException {
195                this.ra = ra;
196        }
197
198        public static byte[] copyByteArray(byte[] b) {
199                byte[] bc = new byte[b.length];
200                System.arraycopy(b, 0, bc, 0, b.length);
201                return bc;
202        }
203
204        public void read() throws IOException {
205                if ( this.read ) {
206                        throw new IllegalStateException(MessageLocalization.getComposedMessage("already.attempted.a.read.on.this.jbig2.file"));
207                }
208                this.read = true;
209
210                readFileHeader();
211                // Annex D
212                if ( this.sequential ) {
213                        // D.1
214                        do {
215                                JBIG2Segment tmp = readHeader();
216                                readSegment(tmp);
217                                segments.put(Integer.valueOf(tmp.segmentNumber), tmp);
218                        } while ( this.ra.getFilePointer() < this.ra.length() );
219                } else {
220                        // D.2
221                        JBIG2Segment tmp;
222                        do {
223                                tmp = readHeader();
224                                segments.put(Integer.valueOf(tmp.segmentNumber), tmp);
225                        } while ( tmp.type != END_OF_FILE );
226                        Iterator<Integer> segs = segments.keySet().iterator();
227                        while ( segs.hasNext() ) {
228                                readSegment(segments.get(segs.next()));
229                        }
230                }
231        }
232
233        void readSegment(JBIG2Segment s) throws IOException {
234                int ptr = ra.getFilePointer();
235
236                if ( s.dataLength == 0xffffffffl ) {
237                        // TODO figure this bit out, 7.2.7
238                        return;
239                }
240
241                byte[] data = new byte[(int)s.dataLength];
242                ra.read(data);
243                s.data = data;
244
245                if ( s.type == PAGE_INFORMATION ) {
246                        int last = ra.getFilePointer();
247                        ra.seek(ptr);
248                        int page_bitmap_width = ra.readInt();
249                        int page_bitmap_height = ra.readInt();
250                        ra.seek(last);
251                        JBIG2Page p = pages.get(Integer.valueOf(s.page));
252                        if ( p == null ) {
253                                throw new IllegalStateException(MessageLocalization.getComposedMessage("referring.to.widht.height.of.page.we.havent.seen.yet.1", s.page));
254                        }
255
256                        p.pageBitmapWidth = page_bitmap_width;
257                        p.pageBitmapHeight = page_bitmap_height;
258                }
259        }
260
261        JBIG2Segment readHeader() throws IOException {
262                int ptr = ra.getFilePointer();
263                // 7.2.1
264                int segment_number = ra.readInt();
265                JBIG2Segment s = new JBIG2Segment(segment_number);
266
267                // 7.2.3
268                int segment_header_flags = ra.read();
269                boolean deferred_non_retain = ( segment_header_flags & 0x80 ) == 0x80;
270                s.deferredNonRetain = deferred_non_retain;
271                boolean page_association_size = ( segment_header_flags & 0x40 ) == 0x40;
272                int segment_type = segment_header_flags & 0x3f;
273                s.type = segment_type;
274
275                //7.2.4
276                int referred_to_byte0 = ra.read();
277                int count_of_referred_to_segments = (referred_to_byte0 & 0xE0) >> 5;
278                int[] referred_to_segment_numbers = null;
279                boolean[] segment_retention_flags = null;
280
281                if ( count_of_referred_to_segments == 7 ) {
282                        // at least five bytes
283                        ra.seek(ra.getFilePointer() - 1);
284                        count_of_referred_to_segments = ra.readInt() & 0x1fffffff;
285                        segment_retention_flags = new boolean[count_of_referred_to_segments+1];
286                        int i = 0;
287                        int referred_to_current_byte = 0;
288                        do {
289                                int j = i % 8;
290                                if ( j == 0) {
291                                        referred_to_current_byte = ra.read();
292                                }
293                                segment_retention_flags[i] = (0x1 << j & referred_to_current_byte) >> j == 0x1;
294                                i++;
295                        } while ( i <= count_of_referred_to_segments );
296
297                } else if ( count_of_referred_to_segments <= 4 ) {
298                        // only one byte
299                        segment_retention_flags = new boolean[count_of_referred_to_segments+1];
300                        referred_to_byte0 &= 0x1f;
301                        for ( int i = 0; i <= count_of_referred_to_segments; i++ ) {
302                                segment_retention_flags[i] = (0x1 << i & referred_to_byte0) >> i == 0x1;
303                        }
304
305                } else if ( count_of_referred_to_segments == 5 || count_of_referred_to_segments == 6 ) {
306                        throw new IllegalStateException(MessageLocalization.getComposedMessage("count.of.referred.to.segments.had.bad.value.in.header.for.segment.1.starting.at.2", String.valueOf(segment_number), String.valueOf(ptr)));
307                }
308                s.segmentRetentionFlags = segment_retention_flags;
309                s.countOfReferredToSegments = count_of_referred_to_segments;
310
311                // 7.2.5
312                referred_to_segment_numbers = new int[count_of_referred_to_segments+1];
313                for ( int i = 1; i <= count_of_referred_to_segments; i++ ) {
314                        if ( segment_number <= 256 ) {
315                                referred_to_segment_numbers[i] = ra.read();
316                        } else if ( segment_number <= 65536 ) {
317                                referred_to_segment_numbers[i] = ra.readUnsignedShort();
318                        } else {
319                                referred_to_segment_numbers[i] = (int)ra.readUnsignedInt(); // TODO wtf ack
320                        }
321                }
322                s.referredToSegmentNumbers = referred_to_segment_numbers;
323
324                // 7.2.6
325                int segment_page_association;
326                int page_association_offset = ra.getFilePointer() - ptr;
327                if ( page_association_size ) {
328                        segment_page_association = ra.readInt();
329                } else {
330                        segment_page_association = ra.read();
331                }
332                if ( segment_page_association < 0 ) {
333                        throw new IllegalStateException(MessageLocalization.getComposedMessage("page.1.invalid.for.segment.2.starting.at.3", String.valueOf(segment_page_association), String.valueOf(segment_number), String.valueOf(ptr)));
334                }
335                s.page = segment_page_association;
336                // so we can change the page association at embedding time.
337                s.page_association_size = page_association_size;
338                s.page_association_offset = page_association_offset;
339
340                if ( segment_page_association > 0 && ! pages.containsKey(Integer.valueOf(segment_page_association)) ) {
341                        pages.put(Integer.valueOf(segment_page_association), new JBIG2Page(segment_page_association, this));
342                }
343                if ( segment_page_association > 0 ) {
344                        pages.get(Integer.valueOf(segment_page_association)).addSegment(s);
345                } else {
346                        globals.add(s);
347                }
348
349                // 7.2.7
350                long segment_data_length = ra.readUnsignedInt();
351                // TODO the 0xffffffff value that might be here, and how to understand those afflicted segments
352                s.dataLength = segment_data_length;
353
354                int end_ptr = ra.getFilePointer();
355                ra.seek(ptr);
356                byte[] header_data = new byte[end_ptr - ptr];
357                ra.read(header_data);
358                s.headerData  = header_data;
359
360                return s;
361        }
362
363        void readFileHeader() throws IOException {
364                ra.seek(0);
365                byte[] idstring = new byte[8];
366                ra.read(idstring);
367
368                byte[] refidstring = {(byte)0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A};
369
370                for ( int i = 0; i < idstring.length; i++ ) {
371                        if ( idstring[i] != refidstring[i] ) {
372                                throw new IllegalStateException(MessageLocalization.getComposedMessage("file.header.idstring.not.good.at.byte.1", i));
373                        }
374                }
375
376                int fileheaderflags = ra.read();
377
378                this.sequential = ( fileheaderflags & 0x1 ) == 0x1;
379                this.number_of_pages_known = ( fileheaderflags & 0x2) == 0x0;
380
381                if ( (fileheaderflags & 0xfc) != 0x0 ) {
382                        throw new IllegalStateException(MessageLocalization.getComposedMessage("file.header.flags.bits.2.7.not.0"));
383                }
384
385                if ( this.number_of_pages_known ) {
386                        this.number_of_pages = ra.readInt();
387                }
388        }
389
390        public int numberOfPages() {
391                return pages.size();
392        }
393
394        public int getPageHeight(int i) {
395                return pages.get(Integer.valueOf(i)).pageBitmapHeight;
396        }
397
398        public int getPageWidth(int i) {
399                return pages.get(Integer.valueOf(i)).pageBitmapWidth;
400        }
401
402        public JBIG2Page getPage(int page) {
403                return pages.get(Integer.valueOf(page));
404        }
405
406        public byte[] getGlobal(boolean for_embedding) {
407                ByteArrayOutputStream os = new ByteArrayOutputStream();
408                try {
409                        for (Object element : globals) {
410                                JBIG2Segment s = (JBIG2Segment)element;
411                                if ( for_embedding &&
412                                                ( s.type == END_OF_FILE || s.type == END_OF_PAGE ) ) {
413                                        continue;
414                                }
415                                os.write(s.headerData);
416                                os.write(s.data);
417                        }
418                        os.close();
419                } catch (IOException e) {
420                        e.printStackTrace();
421                }
422                if ( os.size() <= 0 ) {
423                        return null;
424                }
425                return os.toByteArray();
426        }
427
428        @Override
429    public String toString() {
430                if ( this.read ) {
431                        return "Jbig2SegmentReader: number of pages: " + this.numberOfPages();
432                } else {
433                        return "Jbig2SegmentReader in indeterminate state.";
434                }
435        }
436}