View Javadoc
1   /*
2   Copyright (c) 2007 Health Market Science, Inc.
3   
4   Licensed under the Apache License, Version 2.0 (the "License");
5   you may not use this file except in compliance with the License.
6   You may obtain a copy of the License at
7   
8       http://www.apache.org/licenses/LICENSE-2.0
9   
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15   */
16  
17  package com.healthmarketscience.jackcess.util;
18  
19  import java.io.BufferedReader;
20  import java.io.EOFException;
21  import java.io.File;
22  import java.io.FileReader;
23  import java.io.IOException;
24  import java.sql.ResultSet;
25  import java.sql.ResultSetMetaData;
26  import java.sql.SQLException;
27  import java.util.ArrayList;
28  import java.util.LinkedList;
29  import java.util.List;
30  import java.util.regex.Matcher;
31  import java.util.regex.Pattern;
32  
33  import com.healthmarketscience.jackcess.ColumnBuilder;
34  import com.healthmarketscience.jackcess.DataType;
35  import com.healthmarketscience.jackcess.Database;
36  import com.healthmarketscience.jackcess.Table;
37  import com.healthmarketscience.jackcess.TableBuilder;
38  import com.healthmarketscience.jackcess.impl.ByteUtil;
39  import com.healthmarketscience.jackcess.impl.DatabaseImpl;
40  
41  /**
42   * Utility class for importing tables to an Access database from other
43   * sources.  See the {@link Builder} for convenient configuration of the
44   * import functionality.  Note that most scenarios for customizing input data
45   * can be handled by implementing a custom {@link ImportFilter}.
46   *
47   * @author James Ahlborn
48   * @usage _general_class_
49   */
50  public class ImportUtil 
51  {
52    /** Batch commit size for copying other result sets into this database */
53    private static final int COPY_TABLE_BATCH_SIZE = 200;
54  
55    /** the platform line separator */
56    static final String LINE_SEPARATOR = System.getProperty("line.separator");
57    
58    private ImportUtil() {}
59  
60    /**
61     * Returns a List of Column instances converted from the given
62     * ResultSetMetaData (this is the same method used by the various {@code
63     * importResultSet()} methods).
64     *
65     * @return a List of Columns
66     */
67    public static List<ColumnBuilder> toColumns(ResultSetMetaData md)
68      throws SQLException
69    {
70        List<ColumnBuilder> columns = new LinkedList<ColumnBuilder>();
71        for (int i = 1; i <= md.getColumnCount(); i++) {
72          ColumnBuilder column = new ColumnBuilder(md.getColumnName(i))
73            .escapeName();
74          int lengthInUnits = md.getColumnDisplaySize(i);
75          column.setSQLType(md.getColumnType(i), lengthInUnits);
76          DataType type = column.getType();
77          // we check for isTrueVariableLength here to avoid setting the length
78          // for a NUMERIC column, which pretends to be var-len, even though it
79          // isn't
80          if(type.isTrueVariableLength() && !type.isLongValue()) {
81            column.setLengthInUnits((short)lengthInUnits);
82          }
83          if(type.getHasScalePrecision()) {
84            int scale = md.getScale(i);
85            int precision = md.getPrecision(i);
86            if(type.isValidScale(scale)) {
87              column.setScale((byte)scale);
88            }
89            if(type.isValidPrecision(precision)) {
90              column.setPrecision((byte)precision);
91            }
92          }
93          columns.add(column);
94        }
95        return columns;
96    }
97  
98    /**
99     * Copy an existing JDBC ResultSet into a new table in this database.
100    * <p>
101    * Equivalent to:
102    * {@code  importResultSet(source, db, name, SimpleImportFilter.INSTANCE);}
103    * 
104    * @param name Name of the new table to create
105    * @param source ResultSet to copy from
106    *
107    * @return the name of the copied table
108    *
109    * @see #importResultSet(ResultSet,Database,String,ImportFilter)
110    * @see Builder
111    */
112   public static String importResultSet(ResultSet source, Database db,
113                                        String name)
114     throws SQLException, IOException
115   {
116     return importResultSet(source, db, name, SimpleImportFilter.INSTANCE);
117   }
118   
119   /**
120    * Copy an existing JDBC ResultSet into a new table in this database.
121    * <p>
122    * Equivalent to:
123    * {@code  importResultSet(source, db, name, filter, false);}
124    * 
125    * @param name Name of the new table to create
126    * @param source ResultSet to copy from
127    * @param filter valid import filter
128    *
129    * @return the name of the imported table
130    *
131    * @see #importResultSet(ResultSet,Database,String,ImportFilter,boolean)
132    * @see Builder
133    */
134   public static String importResultSet(ResultSet source, Database db,
135                                        String name, ImportFilter filter)
136     throws SQLException, IOException
137   {
138     return importResultSet(source, db, name, filter, false);
139   }
140    
141   /**
142    * Copy an existing JDBC ResultSet into a new (or optionally existing) table
143    * in this database.
144    * 
145    * @param name Name of the new table to create
146    * @param source ResultSet to copy from
147    * @param filter valid import filter
148    * @param useExistingTable if {@code true} use current table if it already
149    *                         exists, otherwise, create new table with unique
150    *                         name
151    *
152    * @return the name of the imported table
153    * 
154    * @see Builder
155    */
156   public static String importResultSet(ResultSet source, Database db,
157                                        String name, ImportFilter filter,
158                                        boolean useExistingTable)
159     throws SQLException, IOException
160   {
161     ResultSetMetaData md = source.getMetaData();
162 
163     name = TableBuilder.escapeIdentifier(name);
164     Table table = null;
165     if(!useExistingTable || ((table = db.getTable(name)) == null)) {
166       List<ColumnBuilder> columns = toColumns(md);
167       table = createUniqueTable(db, name, columns, md, filter);
168     }
169 
170     List<Object[]> rows = new ArrayList<Object[]>(COPY_TABLE_BATCH_SIZE);
171     int numColumns = md.getColumnCount();
172 
173     while (source.next()) {
174       Object[] row = new Object[numColumns];
175       for (int i = 0; i < row.length; i++) {
176         row[i] = source.getObject(i + 1);
177       }
178       row = filter.filterRow(row);
179       if(row == null) {
180         continue;
181       }
182       rows.add(row);
183       if (rows.size() == COPY_TABLE_BATCH_SIZE) {
184         table.addRows(rows);
185         rows.clear();
186       }
187     }
188     if (rows.size() > 0) {
189       table.addRows(rows);
190     }
191 
192     return table.getName();
193   }
194   
195   /**
196    * Copy a delimited text file into a new table in this database.
197    * <p>
198    * Equivalent to:
199    * {@code  importFile(f, name, db, delim, SimpleImportFilter.INSTANCE);}
200    * 
201    * @param name Name of the new table to create
202    * @param f Source file to import
203    * @param delim Regular expression representing the delimiter string.
204    *
205    * @return the name of the imported table
206    *
207    * @see #importFile(File,Database,String,String,ImportFilter)
208    * @see Builder
209    */
210   public static String importFile(File f, Database db, String name,
211                                   String delim)
212     throws IOException
213   {
214     return importFile(f, db, name, delim, SimpleImportFilter.INSTANCE);
215   }
216 
217   /**
218    * Copy a delimited text file into a new table in this database.
219    * <p>
220    * Equivalent to:
221    * {@code  importFile(f, name, db, delim, "'", filter, false);}
222    * 
223    * @param name Name of the new table to create
224    * @param f Source file to import
225    * @param delim Regular expression representing the delimiter string.
226    * @param filter valid import filter
227    *
228    * @return the name of the imported table
229    *
230    * @see #importReader(BufferedReader,Database,String,String,ImportFilter)
231    * @see Builder
232    */
233   public static String importFile(File f, Database db, String name,
234                                   String delim, ImportFilter filter)
235     throws IOException
236   {
237     return importFile(f, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR,
238                       filter, false);
239   }
240 
241   /**
242    * Copy a delimited text file into a new table in this database.
243    * <p>
244    * Equivalent to:
245    * {@code  importReader(new BufferedReader(new FileReader(f)), db, name, delim, "'", filter, useExistingTable, true);}
246    * 
247    * @param name Name of the new table to create
248    * @param f Source file to import
249    * @param delim Regular expression representing the delimiter string.
250    * @param quote the quote character
251    * @param filter valid import filter
252    * @param useExistingTable if {@code true} use current table if it already
253    *                         exists, otherwise, create new table with unique
254    *                         name
255    *
256    * @return the name of the imported table
257    *
258    * @see #importReader(BufferedReader,Database,String,String,ImportFilter,boolean)
259    * @see Builder
260    */
261   public static String importFile(File f, Database db, String name, 
262                                   String delim, char quote, 
263                                   ImportFilter filter,
264                                   boolean useExistingTable)
265     throws IOException
266   {
267     return importFile(f, db, name, delim, quote, filter, useExistingTable, true);
268   }
269 
270   /**
271    * Copy a delimited text file into a new table in this database.
272    * <p>
273    * Equivalent to:
274    * {@code  importReader(new BufferedReader(new FileReader(f)), db, name, delim, "'", filter, useExistingTable, header);}
275    * 
276    * @param name Name of the new table to create
277    * @param f Source file to import
278    * @param delim Regular expression representing the delimiter string.
279    * @param quote the quote character
280    * @param filter valid import filter
281    * @param useExistingTable if {@code true} use current table if it already
282    *                         exists, otherwise, create new table with unique
283    *                         name
284    * @param header if {@code false} the first line is not a header row, only
285    *               valid if useExistingTable is {@code true}
286    * @return the name of the imported table
287    *
288    * @see #importReader(BufferedReader,Database,String,String,char,ImportFilter,boolean,boolean)
289    * @see Builder
290    */
291   public static String importFile(File f, Database db, String name, 
292                                   String delim, char quote, 
293                                   ImportFilter filter,
294                                   boolean useExistingTable,
295                                   boolean header)
296     throws IOException
297   {
298     BufferedReader in = null;
299     try {
300       in = new BufferedReader(new FileReader(f));
301       return importReader(in, db, name, delim, quote, filter, 
302                           useExistingTable, header);
303     } finally {
304       ByteUtil.closeQuietly(in);
305     }
306   }
307 
308   /**
309    * Copy a delimited text file into a new table in this database.
310    * <p>
311    * Equivalent to:
312    * {@code  importReader(in, db, name, delim, SimpleImportFilter.INSTANCE);}
313    * 
314    * @param name Name of the new table to create
315    * @param in Source reader to import
316    * @param delim Regular expression representing the delimiter string.
317    *
318    * @return the name of the imported table
319    *
320    * @see #importReader(BufferedReader,Database,String,String,ImportFilter)
321    * @see Builder
322    */
323   public static String importReader(BufferedReader in, Database db, 
324                                     String name, String delim)
325     throws IOException
326   {
327     return importReader(in, db, name, delim, SimpleImportFilter.INSTANCE);
328   }
329   
330   /**
331    * Copy a delimited text file into a new table in this database.
332    * <p>
333    * Equivalent to:
334    * {@code  importReader(in, db, name, delim, filter, false);}
335    * 
336    * @param name Name of the new table to create
337    * @param in Source reader to import
338    * @param delim Regular expression representing the delimiter string.
339    * @param filter valid import filter
340    *
341    * @return the name of the imported table
342    *
343    * @see #importReader(BufferedReader,Database,String,String,ImportFilter,boolean)
344    * @see Builder
345    */
346   public static String importReader(BufferedReader in, Database db, 
347                                     String name, String delim,
348                                     ImportFilter filter)
349     throws IOException
350   {
351     return importReader(in, db, name, delim, filter, false);
352   }
353 
354   /**
355    * Copy a delimited text file into a new (or optionally exixsting) table in
356    * this database.
357    * <p>
358    * Equivalent to:
359    * {@code  importReader(in, db, name, delim, '"', filter, false);}
360    * 
361    * @param name Name of the new table to create
362    * @param in Source reader to import
363    * @param delim Regular expression representing the delimiter string.
364    * @param filter valid import filter
365    * @param useExistingTable if {@code true} use current table if it already
366    *                         exists, otherwise, create new table with unique
367    *                         name
368    *
369    * @return the name of the imported table
370    * 
371    * @see Builder
372    */
373   public static String importReader(BufferedReader in, Database db, 
374                                     String name, String delim,
375                                     ImportFilter filter, 
376                                     boolean useExistingTable)
377     throws IOException
378   {
379     return importReader(in, db, name, delim, ExportUtil.DEFAULT_QUOTE_CHAR,
380                         filter, useExistingTable);
381   }
382 
383   /**
384    * Copy a delimited text file into a new (or optionally exixsting) table in
385    * this database.
386    * <p>
387    * Equivalent to:
388    * {@code  importReader(in, db, name, delim, '"', filter, useExistingTable, true);}
389    * 
390    * @param name Name of the new table to create
391    * @param in Source reader to import
392    * @param delim Regular expression representing the delimiter string.
393    * @param quote the quote character
394    * @param filter valid import filter
395    * @param useExistingTable if {@code true} use current table if it already
396    *                         exists, otherwise, create new table with unique
397    *                         name
398    *
399    * @return the name of the imported table
400    * 
401    * @see Builder
402    */
403   public static String importReader(BufferedReader in, Database db, 
404                                     String name, String delim, char quote,
405                                     ImportFilter filter,
406                                     boolean useExistingTable)
407     throws IOException
408   {
409     return importReader(in, db, name, delim, quote, filter, useExistingTable, 
410                         true);
411   }
412 
413   /**
414    * Copy a delimited text file into a new (or optionally exixsting) table in
415    * this database.
416    * 
417    * @param name Name of the new table to create
418    * @param in Source reader to import
419    * @param delim Regular expression representing the delimiter string.
420    * @param quote the quote character
421    * @param filter valid import filter
422    * @param useExistingTable if {@code true} use current table if it already
423    *                         exists, otherwise, create new table with unique
424    *                         name
425    * @param header if {@code false} the first line is not a header row, only
426    *               valid if useExistingTable is {@code true}
427    *
428    * @return the name of the imported table
429    * 
430    * @see Builder
431    */
432   public static String importReader(BufferedReader in, Database db, 
433                                     String name, String delim, char quote,
434                                     ImportFilter filter,
435                                     boolean useExistingTable, boolean header)
436     throws IOException
437   {
438     String line = in.readLine();
439     if(DatabaseImpl.isBlank(line)) {
440       return null;
441     }
442 
443     Pattern delimPat = Pattern.compile(delim);
444 
445     try {
446       name = TableBuilder.escapeIdentifier(name);
447       Table table = null;
448       if(!useExistingTable || ((table = db.getTable(name)) == null)) {
449 
450         List<ColumnBuilder> columns = new LinkedList<ColumnBuilder>();
451         Object[] columnNames = splitLine(line, delimPat, quote, in, 0);
452       
453         for (int i = 0; i < columnNames.length; i++) {
454           columns.add(new ColumnBuilder((String)columnNames[i], DataType.TEXT)
455                       .escapeName()
456                       .setLength((short)DataType.TEXT.getMaxSize())
457                       .toColumn());
458         }
459 
460         table = createUniqueTable(db, name, columns, null, filter);
461         
462         // the first row was a header row
463         header = true;
464       }
465 
466       List<Object[]> rows = new ArrayList<Object[]>(COPY_TABLE_BATCH_SIZE);
467       int numColumns = table.getColumnCount();
468       
469       if(!header) {
470         // first line is _not_ a header line
471         Object[] data = splitLine(line, delimPat, quote, in, numColumns);
472         data = filter.filterRow(data);
473         if(data != null) {
474           rows.add(data);
475         } 
476       }
477 
478       while ((line = in.readLine()) != null)
479       {
480         Object[] data = splitLine(line, delimPat, quote, in, numColumns);
481         data = filter.filterRow(data);
482         if(data == null) {
483           continue;
484         }
485         rows.add(data);
486         if (rows.size() == COPY_TABLE_BATCH_SIZE) {
487           table.addRows(rows);
488           rows.clear();
489         }
490       }
491       if (rows.size() > 0) {
492         table.addRows(rows);
493       }
494 
495       return table.getName();
496 
497     } catch(SQLException e) {
498       throw (IOException)new IOException(e.getMessage()).initCause(e);
499     }
500   }
501 
502   /**
503    * Splits the given line using the given delimiter pattern and quote
504    * character.  May read additional lines for quotes spanning newlines.
505    */
506   private static Object[] splitLine(String line, Pattern delim, char quote,
507                                     BufferedReader in, int numColumns)
508     throws IOException
509   {
510     List<String> tokens = new ArrayList<String>();
511     StringBuilder sb = new StringBuilder();
512     Matcher m = delim.matcher(line);
513     int idx = 0;
514 
515     while(idx < line.length()) {
516 
517       if(line.charAt(idx) == quote) {
518 
519         // find quoted value
520         sb.setLength(0);
521         ++idx;
522         while(true) {
523 
524           int endIdx = line.indexOf(quote, idx);
525 
526           if(endIdx >= 0) {
527 
528             sb.append(line, idx, endIdx);
529             ++endIdx;
530             if((endIdx < line.length()) && (line.charAt(endIdx) == quote)) {
531 
532               // embedded quote
533               sb.append(quote);
534               // keep searching
535               idx = endIdx + 1;
536 
537             } else {
538               
539               // done
540               idx = endIdx;
541               break;
542             }
543 
544           } else {
545 
546             // line wrap
547             sb.append(line, idx, line.length());
548             sb.append(LINE_SEPARATOR);
549 
550             idx = 0;
551             line = in.readLine();
552             if(line == null) {
553               throw new EOFException("Missing end of quoted value " + sb);
554             }
555           }
556         }
557 
558         tokens.add(sb.toString());
559 
560         // skip next delim
561         idx = (m.find(idx) ? m.end() : line.length());
562 
563       } else if(m.find(idx)) {
564 
565         // next unquoted value
566         tokens.add(line.substring(idx, m.start()));
567         idx = m.end();
568 
569       } else {
570 
571         // trailing token
572         tokens.add(line.substring(idx));
573         idx = line.length();
574       }
575     }
576 
577     return tokens.toArray(new Object[Math.max(tokens.size(), numColumns)]);
578   }
579 
580   /**
581    * Returns a new table with a unique name and the given table definition.
582    */
583   private static Table createUniqueTable(Database db, String name,
584                                          List<ColumnBuilder> columns,
585                                          ResultSetMetaData md, 
586                                          ImportFilter filter)
587     throws IOException, SQLException
588   {
589     // otherwise, find unique name and create new table
590     String baseName = name;
591     int counter = 2;
592     while(db.getTable(name) != null) {
593       name = baseName + (counter++);
594     }
595     
596     return new TableBuilder(name)
597       .addColumns(filter.filterColumns(columns, md))
598       .toTable(db);
599   }
600 
601   /**
602    * Builder which simplifies configuration of an import operation.
603    */
604   public static class Builder
605   {
606     private Database _db;
607     private String _tableName;
608     private String _delim = ExportUtil.DEFAULT_DELIMITER;
609     private char _quote = ExportUtil.DEFAULT_QUOTE_CHAR;
610     private ImportFilter _filter = SimpleImportFilter.INSTANCE;
611     private boolean _useExistingTable;
612     private boolean _header = true;
613 
614     public Builder(Database db) {
615       this(db, null);
616     }
617 
618     public Builder(Database db, String tableName) {
619       _db = db;
620       _tableName = tableName;
621     }
622 
623     public Builder setDatabase(Database db) {
624       _db = db;
625       return this;
626     }
627 
628     public Builder setTableName(String tableName) {
629       _tableName = tableName;
630       return this;
631     }
632 
633     public Builder setDelimiter(String delim) {
634       _delim = delim;
635       return this;
636     }
637 
638     public Builder setQuote(char quote) {
639       _quote = quote;
640       return this;
641     }
642 
643     public Builder setFilter(ImportFilter filter) {
644       _filter = filter;
645       return this;
646     }
647 
648     public Builder setUseExistingTable(boolean useExistingTable) {
649       _useExistingTable = useExistingTable;
650       return this;
651     }
652 
653     public Builder setHeader(boolean header) {
654       _header = header;
655       return this;
656     }
657 
658     /**
659      * @see ImportUtil#importResultSet(ResultSet,Database,String,ImportFilter,boolean)
660      */
661     public String importResultSet(ResultSet source)
662       throws SQLException, IOException
663     {
664       return ImportUtil.importResultSet(source, _db, _tableName, _filter,  
665                                         _useExistingTable);
666     }
667 
668     /**
669      * @see ImportUtil#importFile(File,Database,String,String,char,ImportFilter,boolean,boolean)
670      */
671     public String importFile(File f) throws IOException {
672       return ImportUtil.importFile(f, _db, _tableName, _delim, _quote, _filter,
673                                    _useExistingTable, _header);
674     }
675 
676     /**
677      * @see ImportUtil#importReader(BufferedReader,Database,String,String,char,ImportFilter,boolean,boolean)
678      */
679     public String importReader(BufferedReader reader) throws IOException {
680       return ImportUtil.importReader(reader, _db, _tableName, _delim, _quote, 
681                                      _filter, _useExistingTable, _header);
682     }
683   }
684 
685 }