View Javadoc
1   /*
2   Copyright (c) 2008 Health Market Science, Inc.
3   
4   Licensed under the Apache License, Version 2.0 (the "License");
5   you may not use this file except in compliance with the License.
6   You may obtain a copy of the License at
7   
8       http://www.apache.org/licenses/LICENSE-2.0
9   
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15  */
16  
17  package com.healthmarketscience.jackcess.impl;
18  
19  import java.io.BufferedReader;
20  import java.io.IOException;
21  import java.io.InputStreamReader;
22  import java.util.Arrays;
23  import java.util.HashMap;
24  import java.util.Map;
25  
26  import static com.healthmarketscience.jackcess.impl.ByteUtil.ByteStream;
27  
28  /**
29   * Various constants used for creating "general legacy" (access 2000-2007)
30   * sort order text index entries.
31   *
32   * @author James Ahlborn
33   */
34  public class GeneralLegacyIndexCodes {
35  
36    static final int MAX_TEXT_INDEX_CHAR_LENGTH =
37      (JetFormat.TEXT_FIELD_MAX_LENGTH / JetFormat.TEXT_FIELD_UNIT_SIZE);
38  
39    static final byte END_TEXT = (byte)0x01;
40    static final byte END_EXTRA_TEXT = (byte)0x00;
41  
42    // unprintable char is removed from normal text.
43    // pattern for unprintable chars in the extra bytes:
44    // 01 01 01 <pos> 06  <code> )
45    // <pos> = 7 + (4 * char_pos) | 0x8000 (as short)
46    // <code> = char code
47    static final int UNPRINTABLE_COUNT_START = 7;
48    static final int UNPRINTABLE_COUNT_MULTIPLIER = 4;
49    static final int UNPRINTABLE_OFFSET_FLAGS = 0x8000;
50    static final byte UNPRINTABLE_MIDFIX = (byte)0x06;
51  
52    // international char is replaced with ascii char.
53    // pattern for international chars in the extra bytes:
54    // [ 02 (for each normal char) ] [ <symbol_code> (for each inat char) ]
55    static final byte INTERNATIONAL_EXTRA_PLACEHOLDER = (byte)0x02;  
56  
57    // see Index.writeCrazyCodes for details on writing crazy codes
58    static final byte CRAZY_CODE_START = (byte)0x80;
59    static final byte CRAZY_CODE_1 = (byte)0x02;
60    static final byte CRAZY_CODE_2 = (byte)0x03;
61    static final byte[] CRAZY_CODES_SUFFIX = 
62      new byte[]{(byte)0xFF, (byte)0x02, (byte)0x80, (byte)0xFF, (byte)0x80};
63    static final byte CRAZY_CODES_UNPRINT_SUFFIX = (byte)0xFF;
64  
65    // stash the codes in some resource files
66    private static final String CODES_FILE = 
67      DatabaseImpl.RESOURCE_PATH + "index_codes_genleg.txt";
68    private static final String EXT_CODES_FILE = 
69      DatabaseImpl.RESOURCE_PATH + "index_codes_ext_genleg.txt";
70  
71    /**
72     * Enum which classifies the types of char encoding strategies used when
73     * creating text index entries.
74     */
75    enum Type {
76      SIMPLE("S") {
77        @Override public CharHandler parseCodes(String[] codeStrings) {
78          return parseSimpleCodes(codeStrings);
79        }
80      },
81      INTERNATIONAL("I") {
82        @Override public CharHandler parseCodes(String[] codeStrings) {
83          return parseInternationalCodes(codeStrings);
84        }
85      },
86      UNPRINTABLE("U") {
87        @Override public CharHandler parseCodes(String[] codeStrings) {
88          return parseUnprintableCodes(codeStrings);
89        }
90      },
91      UNPRINTABLE_EXT("P") {
92        @Override public CharHandler parseCodes(String[] codeStrings) {
93          return parseUnprintableExtCodes(codeStrings);
94        }
95      },
96      INTERNATIONAL_EXT("Z") {
97        @Override public CharHandler parseCodes(String[] codeStrings) {
98          return parseInternationalExtCodes(codeStrings);
99        }
100     },
101     IGNORED("X") {
102       @Override public CharHandler parseCodes(String[] codeStrings) {
103         return IGNORED_CHAR_HANDLER;
104       }
105     };
106 
107     private final String _prefixCode;
108 
109     private Type(String prefixCode) {
110       _prefixCode = prefixCode;
111     }
112 
113     public String getPrefixCode() {
114       return _prefixCode;
115     }
116 
117     public abstract CharHandler parseCodes(String[] codeStrings);
118   }
119 
120   /**
121    * Base class for the handlers which hold the text index character encoding
122    * information.
123    */
124   abstract static class CharHandler {
125     public abstract Type getType();
126     public byte[] getInlineBytes() {
127       return null;
128     }
129     public byte[] getExtraBytes() {
130       return null;
131     }
132     public byte[] getUnprintableBytes() {
133       return null;
134     }
135     public byte getExtraByteModifier() {
136       return 0;
137     }
138     public byte getCrazyFlag() {
139       return 0;
140     }
141   }
142 
143   /**
144    * CharHandler for Type.SIMPLE
145    */
146   private static final class SimpleCharHandler extends CharHandler {
147     private byte[] _bytes;
148     private SimpleCharHandler(byte[] bytes) {
149       _bytes = bytes;
150     }
151     @Override public Type getType() {
152       return Type.SIMPLE;
153     }
154     @Override public byte[] getInlineBytes() {
155       return _bytes;
156     }
157   }
158 
159   /**
160    * CharHandler for Type.INTERNATIONAL
161    */
162   private static final class InternationalCharHandler extends CharHandler {
163     private byte[] _bytes;
164     private byte[] _extraBytes;
165     private InternationalCharHandler(byte[] bytes, byte[] extraBytes) {
166       _bytes = bytes;
167       _extraBytes = extraBytes;
168     }
169     @Override public Type getType() {
170       return Type.INTERNATIONAL;
171     }
172     @Override public byte[] getInlineBytes() {
173       return _bytes;
174     }
175     @Override public byte[] getExtraBytes() {
176       return _extraBytes;
177     }
178   }
179 
180   /**
181    * CharHandler for Type.UNPRINTABLE
182    */
183   private static final class UnprintableCharHandler extends CharHandler {
184     private byte[] _unprintBytes;
185     private UnprintableCharHandler(byte[] unprintBytes) {
186       _unprintBytes = unprintBytes;
187     }
188     @Override public Type getType() {
189       return Type.UNPRINTABLE;
190     }
191     @Override public byte[] getUnprintableBytes() {
192       return _unprintBytes;
193     }
194   }
195 
196   /**
197    * CharHandler for Type.UNPRINTABLE_EXT
198    */
199   private static final class UnprintableExtCharHandler extends CharHandler {
200     private byte _extraByteMod;
201     private UnprintableExtCharHandler(Byte extraByteMod) {
202       _extraByteMod = extraByteMod;
203     }
204     @Override public Type getType() {
205       return Type.UNPRINTABLE_EXT;
206     }
207     @Override public byte getExtraByteModifier() {
208       return _extraByteMod;
209     }
210   }
211 
212   /**
213    * CharHandler for Type.INTERNATIONAL_EXT
214    */
215   private static final class InternationalExtCharHandler extends CharHandler {
216     private byte[] _bytes;
217     private byte[] _extraBytes;
218     private byte _crazyFlag;
219     private InternationalExtCharHandler(byte[] bytes, byte[] extraBytes,
220                                         byte crazyFlag) {
221       _bytes = bytes;
222       _extraBytes = extraBytes;
223       _crazyFlag = crazyFlag;
224     }
225     @Override public Type getType() {
226       return Type.INTERNATIONAL_EXT;
227     }
228     @Override public byte[] getInlineBytes() {
229       return _bytes;
230     }
231     @Override public byte[] getExtraBytes() {
232       return _extraBytes;
233     }
234     @Override public byte getCrazyFlag() {
235       return _crazyFlag;
236     }
237   }
238 
239   /** shared CharHandler instance for Type.IGNORED */
240   static final CharHandler IGNORED_CHAR_HANDLER = new CharHandler() {
241     @Override public Type getType() {
242       return Type.IGNORED;
243     }
244   };
245 
246   /** alternate shared CharHandler instance for "surrogate" chars (which we do
247       not handle) */
248   static final CharHandler SURROGATE_CHAR_HANDLER = new CharHandler() {
249     @Override public Type getType() {
250       return Type.IGNORED;
251     }
252     @Override public byte[] getInlineBytes() {
253       throw new IllegalStateException(
254           "Surrogate pair chars are not handled");
255     }
256   };
257 
258   static final char FIRST_CHAR = (char)0x0000;
259   static final char LAST_CHAR = (char)0x00FF;
260   static final char FIRST_EXT_CHAR = LAST_CHAR + 1;
261   static final char LAST_EXT_CHAR = (char)0xFFFF;
262 
263   private static final class Codes
264   {
265     /** handlers for the first 256 chars.  use nested class to lazy load the
266         handlers */
267     private static final CharHandler[] _values = loadCodes(
268         CODES_FILE, FIRST_CHAR, LAST_CHAR);
269   }
270   
271   private static final class ExtCodes
272   {
273     /** handlers for the rest of the chars in BMP 0.  use nested class to
274         lazy load the handlers */
275     private static final CharHandler[] _values = loadCodes(
276         EXT_CODES_FILE, FIRST_EXT_CHAR, LAST_EXT_CHAR);
277   }
278 
279   static final GeneralLegacyIndexCodes GEN_LEG_INSTANCE = 
280     new GeneralLegacyIndexCodes();
281   
282   GeneralLegacyIndexCodes() {
283   }
284 
285   /**
286    * Returns the CharHandler for the given character.
287    */
288   CharHandler getCharHandler(char c)
289   {
290     if(c <= LAST_CHAR) {
291       return Codes._values[c];
292     }
293 
294     int extOffset = asUnsignedChar(c) - asUnsignedChar(FIRST_EXT_CHAR);
295     return ExtCodes._values[extOffset];
296   }
297 
298   /**
299    * Loads the CharHandlers for the given range of characters from the
300    * resource file with the given name.
301    */
302   static CharHandler[] loadCodes(String codesFilePath,
303                                  char firstChar, char lastChar)
304   {
305     int numCodes = (asUnsignedChar(lastChar) - asUnsignedChar(firstChar)) + 1;
306     CharHandler[] values = new CharHandler[numCodes];
307 
308     Map<String,Type> prefixMap = new HashMap<String,Type>();
309     for(Type type : Type.values()) {
310       prefixMap.put(type.getPrefixCode(), type);
311     }
312 
313     BufferedReader reader = null;
314     try {
315 
316       reader = new BufferedReader(
317           new InputStreamReader(
318               DatabaseImpl.getResourceAsStream(codesFilePath), "US-ASCII"));
319       
320       int start = asUnsignedChar(firstChar);
321       int end = asUnsignedChar(lastChar);
322       for(int i = start; i <= end; ++i) {
323         char c = (char)i;
324         CharHandler ch = null;
325         if(Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
326           // surrogate chars are not included in the codes files
327           ch = SURROGATE_CHAR_HANDLER;
328         } else {
329           String codeLine = reader.readLine();
330           ch = parseCodes(prefixMap, codeLine);
331         }
332         values[(i - start)] = ch;
333       }
334 
335     } catch(IOException e) {
336       throw new RuntimeException("failed loading index codes file " +
337                                  codesFilePath, e);
338     } finally {
339       ByteUtil.closeQuietly(reader);
340     }
341 
342     return values;
343   }
344 
345   /**
346    * Returns a CharHandler parsed from the given line from an index codes
347    * file.
348    */
349   private static CharHandler parseCodes(Map<String,Type> prefixMap,
350                                         String codeLine)
351   {
352     String prefix = codeLine.substring(0, 1);
353     String suffix = ((codeLine.length() > 1) ? codeLine.substring(1) : "");
354     return prefixMap.get(prefix).parseCodes(suffix.split(",", -1));
355   }
356 
357   /**
358    * Returns a SimpleCharHandler parsed from the given index code strings.
359    */
360   private static CharHandler parseSimpleCodes(String[] codeStrings) 
361   {
362     if(codeStrings.length != 1) {
363       throw new IllegalStateException("Unexpected code strings " +
364                                       Arrays.asList(codeStrings));
365     }
366     return new SimpleCharHandler(codesToBytes(codeStrings[0], true));
367   }
368 
369   /**
370    * Returns an InternationalCharHandler parsed from the given index code
371    * strings.
372    */
373   private static CharHandler parseInternationalCodes(String[] codeStrings)
374   {
375     if(codeStrings.length != 2) {
376       throw new IllegalStateException("Unexpected code strings " +
377                                       Arrays.asList(codeStrings));
378     }
379     return new InternationalCharHandler(codesToBytes(codeStrings[0], true),
380                                         codesToBytes(codeStrings[1], true));
381   }
382 
383   /**
384    * Returns a UnprintableCharHandler parsed from the given index code
385    * strings.
386    */
387   private static CharHandler parseUnprintableCodes(String[] codeStrings)
388   {
389     if(codeStrings.length != 1) {
390       throw new IllegalStateException("Unexpected code strings " +
391                                       Arrays.asList(codeStrings));
392     }
393     return new UnprintableCharHandler(codesToBytes(codeStrings[0], true));
394   }
395 
396   /**
397    * Returns a UnprintableExtCharHandler parsed from the given index code
398    * strings.
399    */
400   private static CharHandler parseUnprintableExtCodes(String[] codeStrings) 
401   {
402     if(codeStrings.length != 1) {
403       throw new IllegalStateException("Unexpected code strings " +
404                                       Arrays.asList(codeStrings));
405     }
406     byte[] bytes = codesToBytes(codeStrings[0], true);
407     if(bytes.length != 1) {
408       throw new IllegalStateException("Unexpected code strings " +
409                                       Arrays.asList(codeStrings));
410     }
411     return new UnprintableExtCharHandler(bytes[0]);
412   }
413 
414   /**
415    * Returns a InternationalExtCharHandler parsed from the given index code
416    * strings.
417    */
418   private static CharHandler parseInternationalExtCodes(String[] codeStrings) 
419   {
420     if(codeStrings.length != 3) {
421       throw new IllegalStateException("Unexpected code strings " +
422                                       Arrays.asList(codeStrings));
423     }
424 
425     byte crazyFlag = ("1".equals(codeStrings[2]) ?
426                       CRAZY_CODE_1 : CRAZY_CODE_2);
427     return new InternationalExtCharHandler(codesToBytes(codeStrings[0], true),
428                                            codesToBytes(codeStrings[1], false),
429                                            crazyFlag);
430   }
431 
432   /**
433    * Converts a string of hex encoded bytes to a byte[], optionally throwing
434    * an exception if no codes are given.
435    */
436   private static byte[] codesToBytes(String codes, boolean required)
437   {
438     if(codes.length() == 0) {
439       if(required) {
440         throw new IllegalStateException("empty code bytes");
441       }
442       return null;
443     }
444     if((codes.length() % 2) != 0) {
445       // stripped a leading 0
446       codes = "0" + codes;
447     }
448     byte[] bytes = new byte[codes.length() / 2];
449     for(int i = 0; i < bytes.length; ++i) {
450       int charIdx = i*2;
451       bytes[i] = (byte)(Integer.parseInt(codes.substring(charIdx, charIdx + 2),
452                                          16));
453     }
454     return bytes;
455   }
456 
457   /**
458    * Returns an the char value converted to an unsigned char value.  Note, I
459    * think this is unnecessary (I think java treats chars as unsigned), but I
460    * did this just to be on the safe side.
461    */
462   static int asUnsignedChar(char c)
463   {
464     return c & 0xFFFF;
465   }
466 
467   /**
468    * Converts an index value for a text column into the entry value (which
469    * is based on a variety of nifty codes).
470    */
471   void writeNonNullIndexTextValue(
472       Object value, ByteStream bout, boolean isAscending)
473     throws IOException
474   {
475     // first, convert to string
476     String str = ColumnImpl.toCharSequence(value).toString();
477 
478     // all text columns (including memos) are only indexed up to the max
479     // number of chars in a VARCHAR column
480     if(str.length() > MAX_TEXT_INDEX_CHAR_LENGTH) {
481       str = str.substring(0, MAX_TEXT_INDEX_CHAR_LENGTH);
482     }
483 
484     // record pprevious entry length so we can do any post-processing
485     // necessary for this entry (handling descending)
486     int prevLength = bout.getLength();
487     
488     // now, convert each character to a "code" of one or more bytes
489     ExtraCodesStream extraCodes = null;
490     ByteStream unprintableCodes = null;
491     ByteStream crazyCodes = null;
492     int charOffset = 0;
493     for(int i = 0; i < str.length(); ++i) {
494 
495       char c = str.charAt(i);
496       CharHandler ch = getCharHandler(c);
497 
498       int curCharOffset = charOffset;
499       byte[] bytes = ch.getInlineBytes();
500       if(bytes != null) {
501         // write the "inline" codes immediately
502         bout.write(bytes);
503 
504         // only increment the charOffset for chars with inline codes
505         ++charOffset;
506       }
507 
508       if(ch.getType() == Type.SIMPLE) {
509         // common case, skip further code handling
510         continue;
511       }
512 
513       bytes = ch.getExtraBytes();
514       byte extraCodeModifier = ch.getExtraByteModifier();
515       if((bytes != null) || (extraCodeModifier != 0)) {
516         if(extraCodes == null) {
517           extraCodes = new ExtraCodesStream(str.length());
518         }
519 
520         // keep track of the extra codes for later
521         writeExtraCodes(curCharOffset, bytes, extraCodeModifier, extraCodes);
522       }
523 
524       bytes = ch.getUnprintableBytes();
525       if(bytes != null) {
526         if(unprintableCodes == null) {
527           unprintableCodes = new ByteStream();
528         }
529           
530         // keep track of the unprintable codes for later
531         writeUnprintableCodes(curCharOffset, bytes, unprintableCodes,
532                               extraCodes);
533       }
534       
535       byte crazyFlag = ch.getCrazyFlag();
536       if(crazyFlag != 0) {
537         if(crazyCodes == null) {
538           crazyCodes = new ByteStream();
539         }
540 
541         // keep track of the crazy flags for later
542         crazyCodes.write(crazyFlag);
543       }
544     }
545 
546     // write end text flag
547     bout.write(END_TEXT);
548 
549     boolean hasExtraCodes = trimExtraCodes(
550         extraCodes, (byte)0, INTERNATIONAL_EXTRA_PLACEHOLDER);
551     boolean hasUnprintableCodes = (unprintableCodes != null);
552     boolean hasCrazyCodes = (crazyCodes != null);
553     if(hasExtraCodes || hasUnprintableCodes || hasCrazyCodes) {
554 
555       // we write all the international extra bytes first
556       if(hasExtraCodes) {
557         extraCodes.writeTo(bout);
558       }
559 
560       if(hasCrazyCodes || hasUnprintableCodes) {
561 
562         // write 2 more end flags
563         bout.write(END_TEXT);
564         bout.write(END_TEXT);
565 
566         // next come the crazy flags
567         if(hasCrazyCodes) {
568 
569           writeCrazyCodes(crazyCodes, bout);
570 
571           // if we are writing unprintable codes after this, tack on another
572           // code
573           if(hasUnprintableCodes) {
574             bout.write(CRAZY_CODES_UNPRINT_SUFFIX);
575           }
576         }
577 
578         // then we write all the unprintable extra bytes
579         if(hasUnprintableCodes) {
580 
581           // write another end flag
582           bout.write(END_TEXT);
583         
584           unprintableCodes.writeTo(bout);
585         }
586       }
587     }
588 
589     // handle descending order by inverting the bytes
590     if(!isAscending) {
591 
592       // we actually write the end byte before flipping the bytes, and write
593       // another one after flipping
594       bout.write(END_EXTRA_TEXT);
595       
596       // flip the bytes that we have written thus far for this text value
597       IndexData.flipBytes(bout.getBytes(), prevLength, 
598                           (bout.getLength() - prevLength));
599     }
600 
601     // write end extra text
602     bout.write(END_EXTRA_TEXT);    
603   }
604 
605   /**
606    * Encodes the given extra code info in the given stream.
607    */
608   private static void writeExtraCodes(
609       int charOffset, byte[] bytes, byte extraCodeModifier,
610       ExtraCodesStream extraCodes)
611     throws IOException
612   {
613     // we fill in a placeholder value for any chars w/out extra codes
614     int numChars = extraCodes.getNumChars();
615     if(numChars < charOffset) {
616       int fillChars = charOffset - numChars;
617       extraCodes.writeFill(fillChars, INTERNATIONAL_EXTRA_PLACEHOLDER);
618       extraCodes.incrementNumChars(fillChars);
619     }
620 
621     if(bytes != null) {
622       
623       // write the actual extra codes and update the number of chars
624       extraCodes.write(bytes);
625       extraCodes.incrementNumChars(1);
626 
627     } else {
628 
629       // extra code modifiers modify the existing extra code bytes and do not
630       // count as additional extra code chars
631       int lastIdx = extraCodes.getLength() - 1;
632       if(lastIdx >= 0) {
633 
634         // the extra code modifier is added to the last extra code written
635         byte lastByte = extraCodes.get(lastIdx);
636         lastByte += extraCodeModifier;
637         extraCodes.set(lastIdx, lastByte);
638 
639       } else {
640 
641         // there is no previous extra code, add a new code (but keep track of
642         // this "unprintable code" prefix)
643         extraCodes.write(extraCodeModifier);
644         extraCodes.setUnprintablePrefixLen(1);
645       }
646     }
647   }
648 
649   /**
650    * Trims any bytes in the given range off of the end of the given stream,
651    * returning whether or not there are any bytes left in the given stream
652    * after trimming.
653    */
654   private static boolean trimExtraCodes(ByteStream extraCodes,
655                                         byte minTrimCode, byte maxTrimCode)
656     throws IOException
657   {
658     if(extraCodes == null) {
659       return false;
660     }
661 
662     extraCodes.trimTrailing(minTrimCode, maxTrimCode);
663 
664     // anything left?
665     return (extraCodes.getLength() > 0);
666   }
667 
668   /**
669    * Encodes the given unprintable char codes in the given stream.
670    */
671   private static void writeUnprintableCodes(
672       int charOffset, byte[] bytes, ByteStream unprintableCodes,
673       ExtraCodesStream extraCodes)
674     throws IOException
675   {
676     // the offset seems to be calculated based on the number of bytes in the
677     // "extra codes" part of the entry (even if there are no extra codes bytes
678     // actually written in the final entry).
679     int unprintCharOffset = charOffset;
680     if(extraCodes != null) {
681       // we need to account for some extra codes which have not been written
682       // yet.  additionally, any unprintable bytes added to the beginning of
683       // the extra codes are ignored.
684       unprintCharOffset = extraCodes.getLength() +
685         (charOffset - extraCodes.getNumChars()) -
686         extraCodes.getUnprintablePrefixLen();
687     }
688 
689     // we write a whacky combo of bytes for each unprintable char which
690     // includes a funky offset and extra char itself
691     int offset =
692       (UNPRINTABLE_COUNT_START +
693        (UNPRINTABLE_COUNT_MULTIPLIER * unprintCharOffset))
694       | UNPRINTABLE_OFFSET_FLAGS;
695 
696     // write offset as big-endian short
697     unprintableCodes.write((offset >> 8) & 0xFF);
698     unprintableCodes.write(offset & 0xFF);
699           
700     unprintableCodes.write(UNPRINTABLE_MIDFIX);
701     unprintableCodes.write(bytes);
702   }
703 
704   /**
705    * Encode the given crazy code bytes into the given byte stream.
706    */
707   private static void writeCrazyCodes(ByteStream crazyCodes, ByteStream bout)
708     throws IOException
709   {
710     // CRAZY_CODE_2 flags at the end are ignored, so ditch them
711     trimExtraCodes(crazyCodes, CRAZY_CODE_2, CRAZY_CODE_2);
712 
713     if(crazyCodes.getLength() > 0) {
714 
715       // the crazy codes get encoded into 6 bit sequences where each code is 2
716       // bits (where the first 2 bits in the byte are a common prefix).
717       byte curByte = CRAZY_CODE_START;
718       int idx = 0;
719       for(int i = 0; i < crazyCodes.getLength(); ++i) {
720         byte nextByte = crazyCodes.get(i);
721         nextByte <<= ((2 - idx) * 2);
722         curByte |= nextByte;
723 
724         ++idx;
725         if(idx == 3) {
726           // write current byte and reset
727           bout.write(curByte);
728           curByte = CRAZY_CODE_START;
729           idx = 0;
730         }
731       }
732 
733       // write last byte
734       if(idx > 0) {
735         bout.write(curByte);
736       }
737     }
738 
739     // write crazy code suffix (note, we write this even if all the codes are
740     // trimmed
741     bout.write(CRAZY_CODES_SUFFIX);
742   }
743 
744   /**
745    * Extension of ByteStream which keeps track of an additional char count and
746    * the length of any "unprintable" code prefix.
747    */
748   private static final class ExtraCodesStream extends ByteStream
749   {
750     private int _numChars;
751     private int _unprintablePrefixLen; 
752 
753     private ExtraCodesStream(int length) {
754       super(length);
755     }
756 
757     public int getNumChars() {
758       return _numChars;
759     }
760     
761     public void incrementNumChars(int inc) {
762       _numChars += inc;
763     }
764 
765     public int getUnprintablePrefixLen() {
766       return _unprintablePrefixLen;
767     }
768 
769     public void setUnprintablePrefixLen(int len) {
770       _unprintablePrefixLen = len;
771     }
772   }
773 
774 }