View Javadoc
1   /*
2   Copyright (c) 2013 James Ahlborn
3   
4   Licensed under the Apache License, Version 2.0 (the "License");
5   you may not use this file except in compliance with the License.
6   You may obtain a copy of the License at
7   
8       http://www.apache.org/licenses/LICENSE-2.0
9   
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15  */
16  
17  package com.healthmarketscience.jackcess.util;
18  
19  import java.io.Closeable;
20  import java.io.File;
21  import java.io.FileInputStream;
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.OutputStream;
26  import java.sql.Blob;
27  
28  import com.healthmarketscience.jackcess.impl.OleUtil;
29  
30  /**
31   * Extensions of the Blob interface with additional functionality for working
32   * with the OLE content from an access database.  The ole data type in access
33   * has a wide range of functionality (including wrappers with nested wrappers
34   * with nested filesystems!), and jackcess only supports a small portion of
35   * it.  That said, jackcess should support the bulk of the common
36   * functionality.
37   * <p/>
38   * The main Blob methods will interact with the <i>entire</i> OLE field data
39   * which, in most cases, contains additional wrapper information.  In order to
40   * access the ultimate "content" contained within the OLE data, the {@link
41   * #getContent} method should be used.  The type of this content may be a
42   * variety of formats, so additional sub-interfaces are available to interact
43   * with it.  The most specific sub-interface can be determined by the {@link
44   * ContentType} of the Content.
45   * <p/>
46   * Once an OleBlob is no longer useful, <i>it should be closed</i> using
47   * {@link #free} or {@link #close} methods (after which, the instance will no
48   * longer be functional).
49   * <p/>
50   * Note, the OleBlob implementation is read-only (through the interface).  In
51   * order to modify blob contents, create a new OleBlob instance using {@link
52   * OleBlob.Builder} and write it to the access database.
53   * <p/>
54   * <b>Example for interpreting an existing OLE field:</b>
55   * <pre>
56   *   OleBlob oleBlob = null;
57   *   try {
58   *     oleBlob = row.getBlob("MyOleColumn");
59   *     Content content = oleBlob.getContent()
60   *     if(content.getType() == OleBlob.ContentType.SIMPLE_PACKAGE) {
61   *       FileOutputStream out = ...;
62   *       ((SimplePackageContent)content).writeTo(out);
63   *       out.closee();
64   *     }
65   *   } finally {
66   *     if(oleBlob != null) { oleBlob.close(); }
67   *   }     
68   * </pre>
69   * <p/>
70   * <b>Example for creating new, embedded ole data:</b>
71   * <pre>
72   *   OleBlob oleBlob = null;
73   *   try {
74   *     oleBlob = new OleBlob.Builder()
75   *       .setSimplePackage(new File("some_data.txt"))
76   *       .toBlob();
77   *     db.addRow(1, oleBlob);
78   *   } finally {
79   *     if(oleBlob != null) { oleBlob.close(); }
80   *   }     
81   * </pre>
82   * <p/>
83   * <b>Example for creating new, linked ole data:</b>
84   * <pre>
85   *   OleBlob oleBlob = null;
86   *   try {
87   *     oleBlob = new OleBlob.Builder()
88   *       .setLink(new File("some_data.txt"))
89   *       .toBlob();
90   *     db.addRow(1, oleBlob);
91   *   } finally {
92   *     if(oleBlob != null) { oleBlob.close(); }
93   *   }     
94   * </pre>
95   *
96   * @author James Ahlborn
97   */
98  public interface OleBlob extends Blob, Closeable
99  {
100   /** Enum describing the types of blob contents which are currently
101       supported/understood */
102   public enum ContentType {
103     /** the blob contents are a link (file path) to some external content.
104         Content will be an instance of LinkContent */
105     LINK, 
106     /** the blob contents are a simple wrapper around some embedded content
107         and related file names/paths.  Content will be an instance
108         SimplePackageContent */
109     SIMPLE_PACKAGE, 
110     /** the blob contents are a complex embedded data known as compound
111         storage (aka OLE2).  Working with compound storage requires the
112         optional POI library.  Content will be an instance of CompoundContent.
113         If the POI library is not available on the classpath, then compound
114         storage data will instead be returned as type {@link #OTHER}. */
115     COMPOUND_STORAGE,
116     /** the top-level blob wrapper is understood, but the nested blob contents
117         are unknown, probably just some embedded content.  Content will be an
118         instance of OtherContent */
119     OTHER,
120     /** the top-level blob wrapper is not understood (this may not be a valid
121         ole instance).  Content will simply be an instance of Content (the
122         data can be accessed from the main blob instance) */ 
123     UNKNOWN;
124   }
125 
126   /**
127    * Writes the entire raw blob data to the given stream (this is the access
128    * db internal format, which includes all wrapper information).
129    *
130    * @param out stream to which the blob will be written
131    */
132   public void writeTo(OutputStream out) throws IOException;
133 
134   /**
135    * Returns the decoded form of the blob contents, if understandable.
136    */
137   public Content getContent() throws IOException;
138 
139 
140   public interface Content 
141   {    
142     /**
143      * Returns the type of this content.
144      */
145     public ContentType getType();
146 
147     /**
148      * Returns the blob which owns this content.
149      */
150     public OleBlob getBlob();
151   }
152 
153   /**
154    * Intermediate sub-interface for Content which has a nested package.
155    */
156   public interface PackageContent extends Content
157   {    
158     public String getPrettyName() throws IOException;
159 
160     public String getClassName() throws IOException;
161 
162     public String getTypeName() throws IOException;
163   }
164 
165   /**
166    * Intermediate sub-interface for Content which has embedded content.
167    */
168   public interface EmbeddedContent extends Content
169   {
170     public long length();
171 
172     public InputStream getStream() throws IOException;
173 
174     public void writeTo(OutputStream out) throws IOException;    
175   }
176 
177   /**
178    * Sub-interface for Content which has the {@link ContentType#LINK} type.
179    * The actual content is external to the access database and can be found at
180    * {@link #getLinkPath}.
181    */
182   public interface LinkContent extends PackageContent
183   {
184     public String getFileName();
185 
186     public String getLinkPath();
187 
188     public String getFilePath();
189 
190     public InputStream getLinkStream() throws IOException;
191   }
192 
193   /**
194    * Sub-interface for Content which has the {@link
195    * ContentType#SIMPLE_PACKAGE} type.  The actual content is embedded within
196    * the access database (but the original file source path can also be found
197    * at {@link #getFilePath}).
198    */
199   public interface SimplePackageContent 
200     extends PackageContent, EmbeddedContent
201   {
202     public String getFileName();
203 
204     public String getFilePath();
205 
206     public String getLocalFilePath();
207   }
208 
209   /**
210    * Sub-interface for Content which has the {@link
211    * ContentType#COMPOUND_STORAGE} type.  Compound storage is a complex
212    * embedding format also known as OLE2.  In some situations (mostly
213    * non-microsoft office file formats) the actual content is available from
214    * the {@link #getContentsEntry} method (if {@link #hasContentsEntry}
215    * returns {@code true}).  In other situations (e.g. microsoft office file
216    * formats), the actual content is most or all of the compound content (but
217    * retrieving the final file may be a complex operation beyond the scope of
218    * jackcess).  Note that the CompoundContent type will only be available if
219    * the POI library is in the classpath, otherwise compound content will be
220    * returned as OtherContent.
221    */
222   public interface CompoundContent extends PackageContent, EmbeddedContent,
223                                            Iterable<CompoundContent.Entry>
224   {
225     public Entry getEntry(String entryName) throws IOException;
226 
227     public boolean hasContentsEntry() throws IOException;
228 
229     public Entry getContentsEntry() throws IOException;
230 
231     /**
232      * A document entry in the compound storage.
233      */
234     public interface Entry extends EmbeddedContent
235     {
236       public String getName();
237 
238       /**
239        * Returns the CompoundContent which owns this entry.
240        */
241       public CompoundContent getParent();
242     }
243   }  
244 
245   /**
246    * Sub-interface for Content which has the {@link ContentType#OTHER} type.
247    * This may be a simple embedded file or some other, currently not
248    * understood complex type.
249    */
250   public interface OtherContent extends PackageContent, EmbeddedContent
251   {
252   }
253 
254   /**
255    * Builder style class for constructing an OleBlob. See {@link OleBlob} for
256    * example usage.
257    */
258   public class Builder
259   {
260     public static final String PACKAGE_PRETTY_NAME = "Packager Shell Object";
261     public static final String PACKAGE_TYPE_NAME = "Package";
262 
263     private ContentType _type;
264     private byte[] _bytes;
265     private InputStream _stream;
266     private long _contentLen;
267     private String _fileName;
268     private String _filePath;
269     private String _prettyName;
270     private String _className;
271     private String _typeName;
272     
273     public ContentType getType() {
274       return _type;
275     }
276 
277     public byte[] getBytes() {
278       return _bytes;
279     }
280 
281     public InputStream getStream() {
282       return _stream;
283     }
284 
285     public long getContentLength() {
286       return _contentLen;
287     }
288 
289     public String getFileName() {
290       return _fileName;
291     }
292 
293     public String getFilePath() {
294       return _filePath;
295     }
296 
297     public String getPrettyName() {
298       return _prettyName;
299     }
300 
301     public String getClassName() {
302       return _className;
303     }
304     
305     public String getTypeName() {
306       return _typeName;
307     }
308     
309     public Builder setSimplePackageBytes(byte[] bytes) {
310       _bytes = bytes;
311       _contentLen = bytes.length;
312       setDefaultPackageType();
313       _type = ContentType.SIMPLE_PACKAGE;
314       return this;
315     }
316 
317     public Builder setSimplePackageStream(InputStream in, long length) {
318       _stream = in;
319       _contentLen = length;
320       setDefaultPackageType();
321       _type = ContentType.SIMPLE_PACKAGE;
322       return this;
323     }
324 
325     public Builder setSimplePackageFileName(String fileName) {
326       _fileName = fileName;
327       setDefaultPackageType();
328       _type = ContentType.SIMPLE_PACKAGE;
329       return this;
330     }
331 
332     public Builder setSimplePackageFilePath(String filePath) {
333       _filePath = filePath;
334       setDefaultPackageType();
335       _type = ContentType.SIMPLE_PACKAGE;
336       return this;
337     }
338 
339     public Builder setSimplePackage(File f) throws FileNotFoundException {
340       _fileName = f.getName();
341       _filePath = f.getAbsolutePath();
342       return setSimplePackageStream(new FileInputStream(f), f.length());
343     }
344 
345     public Builder setLinkFileName(String fileName) {
346       _fileName = fileName;
347       setDefaultPackageType();
348       _type = ContentType.LINK;
349       return this;
350     }
351 
352     public Builder setLinkPath(String link) {
353       _filePath = link;
354       setDefaultPackageType();
355       _type = ContentType.LINK;
356       return this;
357     }
358 
359     public Builder setLink(File f) {
360       _fileName = f.getName();
361       _filePath = f.getAbsolutePath();
362       setDefaultPackageType();
363       _type = ContentType.LINK;
364       return this;
365     }
366 
367     private void setDefaultPackageType() {
368       if(_prettyName == null) {
369         _prettyName = PACKAGE_PRETTY_NAME;
370       }
371       if(_className == null) {
372         _className = PACKAGE_TYPE_NAME;
373       }
374     }
375 
376     public Builder setOtherBytes(byte[] bytes) {
377       _bytes = bytes;
378       _contentLen = bytes.length;
379       _type = ContentType.OTHER;
380       return this;
381     }
382 
383     public Builder setOtherStream(InputStream in, long length) {
384       _stream = in;
385       _contentLen = length;
386       _type = ContentType.OTHER;
387       return this;
388     }
389 
390     public Builder setOther(File f) throws FileNotFoundException {
391       return setOtherStream(new FileInputStream(f), f.length());
392     }
393 
394     public Builder setPackagePrettyName(String prettyName) {
395       _prettyName = prettyName;
396       return this;
397     }
398 
399     public Builder setPackageClassName(String className) {
400       _className = className;
401       return this;
402     }
403 
404     public Builder setPackageTypeName(String typeName) {
405       _typeName = typeName;
406       return this;
407     }
408 
409     public OleBlob toBlob() throws IOException {
410       return OleUtil.createBlob(this);
411     }
412 
413     public static OleBlob fromInternalData(byte[] bytes) throws IOException {
414       return OleUtil.parseBlob(bytes);
415     }
416   }
417 }