View Javadoc

1   package org.opensync.engine.server.adapter;
2   
3   /***
4    * Title:         OpenSync
5    * Description :  This class implements the translation of text files into XML
6    *                files. The text files may be tag separated values files or
7    *                fixe size columns files.
8    *                A descriptor file describes the text file to be translated.
9    */
10  
11  import org.opensync.engine.server.Log;
12  import org.opensync.engine.server.OpenSyncException;
13  import org.opensync.engine.server.OpenSync;
14  import org.opensync.engine.server.adapter.Translator;
15  import org.opensync.engine.util.*;
16  
17  import javax.xml.parsers.*;
18  import org.w3c.dom.*;
19  import org.xml.sax.*;
20  
21  import java.io.*;
22  
23  
24  public class Txt2Xml extends Translator {
25  
26    private DescriptorParser descriptorParser;
27    private boolean start;
28  
29    private int nbRow = 0;
30  
31    private final void error(Document doc, Node root, String errorDesc, String severity, int line) throws SAXException {
32  
33      Element elt = doc.createElement(rowElementTag);
34      Attr attr = doc.createAttribute(DESCRIPTION);
35      attr.setValue(errorDesc);
36      elt.setAttributeNode(attr);
37      attr = doc.createAttribute(SEVERITY);
38      elt.setAttributeNode(attr);
39      attr = doc.createAttribute(LINE_NUMBER);
40      elt.setAttributeNode(attr);
41      root.appendChild(elt);
42  
43      throw new SAXException(new OpenSyncException(errorDesc));
44    }
45  
46    private final void chunkLine(String input, Document doc, Element root) throws SAXException
47    {
48      int outputStop;
49      int fieldStart;
50      int fieldStop;
51      Field currentField;
52      Element row, col;
53  
54      row = doc.createElement(rowElementTag);
55  
56      for (int i=0; i < fields.size() ; ++i)
57      {
58        currentField = (Field)fields.get(i);
59  
60        col = doc.createElement(currentField.name);
61  
62        fieldStart = startingOffset + currentField.start - 1;
63        fieldStop = Math.min(startingOffset + currentField.end, input.length());
64        if (trimFields) {
65          StringBuffer work = new StringBuffer(input.substring(fieldStart,fieldStop));
66          int ptr = work.length();
67    while (ptr > 0 && Character.isWhitespace(work.charAt(ptr-1)))
68      --ptr;
69    work.setLength(ptr);
70          col.appendChild(doc.createTextNode(new String(work.toString().toCharArray(), 0, work.length())));
71        } else {
72          col.appendChild(doc.createTextNode(
73            new String( input.substring(fieldStart,fieldStop).toCharArray(),
74                        0,
75                        fieldStop-fieldStart
76                      )));
77          //System.out.println(input.substring(fieldStart,fieldStop).toCharArray());
78        }
79  
80        row.appendChild(col);
81        root.appendChild(row);
82      }
83    }
84  
85    private final boolean matchBack(StringBuffer in, String division) {
86      int len_in = in.length();
87      int len_division = division.length();
88      int i =0;
89      boolean flag = true;
90      while (flag && ++i <= len_division) {
91        flag = (in.charAt(len_in - i) == division.charAt(len_division - i));
92      }
93      return flag;
94    }
95  
96    private final boolean getLine(Reader input, StringBuffer in, String division)
97     throws IOException {
98      int c;
99      boolean cont = true;
100     while (cont && (c = input.read()) != -1) {
101       in.append((char) c);
102       if (matchBack(in,division)) {
103         cont = false;
104         in.setLength(in.length() - division.length());
105       }
106     }
107     return !cont;
108   }
109 
110   private final boolean dropLine(Reader input, String division)
111    throws IOException {
112     int c;
113     boolean cont = true;
114     StringBuffer char_window = new StringBuffer();
115     while (cont && (c = input.read()) != -1) {
116       char_window.append((char) c);
117       if (matchBack(char_window,division)) {
118         cont = false;
119         char_window.setLength(char_window.length() - division.length());
120       }
121     }
122     return !cont;
123   }
124 
125 
126   static void writeDocument(Node node, Writer out) throws IOException {
127     int type = node.getNodeType();
128     switch (type) {
129       case Node.ELEMENT_NODE:
130         out.write("<" + node.getNodeName());
131         NamedNodeMap attrs = node.getAttributes();
132         int len = attrs.getLength();
133         for (int i=0; i<len; i++) {
134             Attr attr = (Attr)attrs.item(i);
135             out.write(" " + attr.getNodeName() + "=\"" +
136                       escapeXML_Document(attr.getNodeValue()) + "\"");
137         }
138         out.write('>');
139         NodeList children = node.getChildNodes();
140         len = children.getLength();
141         for (int i=0; i<len; i++)
142           writeDocument(children.item(i), out);
143         out.write("</" + node.getNodeName() + ">");
144         break;
145       case Node.ENTITY_REFERENCE_NODE:
146         out.write("&" + node.getNodeName() + ";");
147         break;
148       case Node.CDATA_SECTION_NODE:
149         out.write("<![CDATA[" + node.getNodeValue() + "]]>");
150         break;
151       case Node.TEXT_NODE:
152         out.write(escapeXML_Document(node.getNodeValue()));
153         break;
154       case Node.PROCESSING_INSTRUCTION_NODE:
155         out.write("<?" + node.getNodeName());
156         String data = node.getNodeValue();
157         if (data!=null && data.length()>0)
158            out.write(" " + data);
159         out.write("?>");
160         break;
161     }
162   }
163 
164   static String escapeXML_Document(String s) {
165     StringBuffer str = new StringBuffer();
166     int len = (s != null) ? s.length() : 0;
167     for (int i=0; i<len; i++) {
168        char ch = s.charAt(i);
169        switch (ch) {
170        case '<': str.append("&lt;"); break;
171        case '>': str.append("&gt;"); break;
172        case '&': str.append("&amp;"); break;
173        case '"': str.append("&quot;"); break;
174        case '\'': str.append("&apos;"); break;
175        default: str.append(ch);
176      }
177     }
178     return str.toString();
179   }
180 
181 
182   /***
183    * @param input
184    * @param output
185    * @exception SAXException, IOException
186    * @exception IOException
187    * @exception ParserConfigurationException
188    * @exception SAXException
189    */
190   public final void process(
191      Reader input,
192      StringWriter output
193   )
194      throws SAXException, IOException, ParserConfigurationException
195   {
196 
197 
198       DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
199       DocumentBuilder builder = factory.newDocumentBuilder();
200       Document    document = builder.newDocument();
201 
202       //StringBuffer in = new StringBuffer(2*((Field)fields.get(fields.size() - 1)).end);
203       String in = "";
204      long nb_lines = this.linesCounter(input);
205 
206       input.reset();
207       BufferedReader br = new BufferedReader(input);
208 
209       if (skipfirstlines > 0) {
210 	int linesToSkip = skipfirstlines;
211 	while (linesToSkip > 0) {
212 	  br.readLine();
213 	  //dropLine(input, lineBreakMarker);
214 	  linesToSkip--;
215 	}
216       }
217 
218       nb_lines = nb_lines - this.skiplastlines;
219 
220       Element root = document.createElement(documentElementTag);
221 
222       int count = 0;
223       boolean fatal = false;
224       boolean cont = true;
225       while ( (cont && !fatal) &&  count < nb_lines)
226       {
227         in = "";
228         //cont = getLine(input,in,lineBreakMarker);
229 	cont = ((in = br.readLine()) != null);
230         if (cont) {
231           if (in.length() < ((Field)(fields.get(fields.size()-1))).end && !offWidthOK) {
232             error(document, root, OFF_WIDTH_ROW,NONFATAL,count);
233           } else if (offWidthOK &&
234                       fields.size() > 1 &&
235                       in.length() < ((Field)(fields.get(fields.size()-2))).end
236                     ) {
237                         if (in.length() > 1) {
238                           error(document, root, MISSING_LAST_FIELD,NONFATAL,count);
239                     }
240           } else {
241             chunkLine(in.toString(),document, root);
242           }
243         } else if (in.length() != 0) {
244           fatal = true;
245           error(document, root, UNTERMINATED_ROW,FATAL,count);
246         }
247         ++count;
248       }
249 
250       writeDocument(root,output);
251       output.flush();
252   }
253 
254   /****/
255   public Txt2Xml() {}
256 
257   /***
258    * @param filename
259    * @exception IOException
260    * @exception FileNotFoundException
261    */
262   public FileReader openFileForRead(
263      String filename
264    ) throws FileNotFoundException,IOException
265   {
266 
267     File file = new File(filename);
268     if (!file.exists()) {
269       throw new FileNotFoundException("File " + filename + " does not exist.");
270     }
271     if (!file.canRead()) {
272       throw new IOException("File " + filename + " is not readable.");
273     }
274     return new FileReader(file);
275   }
276 
277   /***
278    * @param filename
279    * @param overwrite
280    * @exception IOException
281    */
282   public FileOutputStream openFileForWrite(
283      String filename,
284      boolean overwrite
285    ) throws IOException
286   {
287     File file = new File(filename);
288     if (file.exists() && !overwrite) {
289       throw new IOException("File " + filename + " already exists.");
290     }
291     if (file.exists() && !file.canWrite()) {
292       throw new IOException("File " + filename + " is not writeable.");
293     }
294     return new FileOutputStream(filename,false);
295   }
296 
297   /***
298    * @param descriptorFileName
299    * @exception IOException
300    * @exception FileNotFoundException
301    * @exception SAXException
302    * @exception ParserConfigurationException
303    */
304   public void readDescriptor(String descriptorFileName)
305   throws ParserConfigurationException, SAXException,FileNotFoundException,IOException {
306     descriptorParser = new DescriptorParser(this);
307 
308     SAXParserFactory factory = SAXParserFactory.newInstance();
309     factory.setValidating(true);
310     SAXParser parser = factory.newSAXParser();
311     parser.parse( new InputSource(new File(descriptorFileName).toURL().toExternalForm()), descriptorParser);
312   }
313 
314   public void startReadInputFile(boolean status) {
315     start = status;
316   }
317 
318   /***
319    * @param input
320    * @exception IOException
321    */
322   public String parseCSV(Reader input)
323    throws java.io.IOException
324 
325   {
326 
327     long nb_lines = this.linesCounter(input);
328 
329     input.reset();
330 
331     BufferedReader br = new BufferedReader(input);
332 
333     if (start && skipfirstlines > 0) {
334       int linesToSkip = skipfirstlines;
335       while (linesToSkip > 0) {
336         br.readLine();
337         linesToSkip--;
338       }
339     }
340 
341     //br = new BufferedReader(this.removeEmptyLines(input));
342 
343     nb_lines = nb_lines - this.skiplastlines;
344 
345 
346     StreamTokenizer st = new StreamTokenizer(br);
347 
348     st.resetSyntax();
349 
350     // Support of the Latin 1 Characters (cf. http://www.pemberley.com/janeinfo/latin1tb.html).
351 
352     st.wordChars(' ', ' ');
353     // for characters from ! to @ which includes '0' to '9'
354     st.wordChars('!','@');
355     st.wordChars('A', 'Z');
356     // for characters: [ \ ] ^ _ `
357     st.wordChars(91, 96);
358     st.wordChars('a', 'z');
359     // for characters: { | } ~
360     st.wordChars(123, 127);
361     // To avoid characters from 128 to 160: no meaning in Unicode
362     st.wordChars(128 + 32, 255);
363 
364     if (!suppressquotes) {
365       // keep quotes in the output
366       st.wordChars('"', '"');
367       st.wordChars('\'', '\'');
368     } else {
369       // double or simple quotes eliminated in the output.
370       st.quoteChar('"');
371       st.quoteChar('\'');
372     }
373 
374     // We want to read the file one line at a time, so end-ofline matters
375     st.eolIsSignificant(true);
376     // The delimiter between fields is a comma, not a space
377     //st.whitespaceChars((int)delimiter.charAt(0), (int)delimiter.charAt(0));
378     // In order to compute the special case:
379     // a||b|c where | is the current delimiter.
380     // The delimiter will be return has a token.
381 
382     st.ordinaryChar((int)delimiter.charAt(0));
383     //System.out.println("delimiter="+delimiter+" (ascii character "+(int)delimiter.charAt(0)+")");
384 
385     //FileWriter fw = new FileWriter(outputFile);
386     StringWriter fw = new StringWriter();
387 
388     // Write the XML declaration and the root element
389     //fw.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
390     fw.write("<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n");
391     fw.write("<DATA>\n");
392 
393     boolean tokenWasDelimiter = false;
394     boolean delimiterFound = false;
395     boolean startRow = false;
396     boolean emptyRow = true;
397     int tok;
398     // Get the first token, then check its type
399     tok = st.nextToken();
400     while (st.ttype != StreamTokenizer.TT_EOF && nb_lines > 0)
401     {
402       // We're not at EOF, so start a row
403       int i = 0;
404       int length = colNames.size();
405       startRow = true;
406       emptyRow = true;
407 
408       // Handling of one line
409       StringBuffer xmlLine = new StringBuffer();
410 
411       while (st.ttype != StreamTokenizer.TT_EOL) {
412         if (tok == StreamTokenizer.TT_WORD && st.sval != null) {
413           if (startRow) {
414             xmlLine.append("  <ROW>\n");
415             startRow = false;
416             if (st.sval=="" && length == 1) emptyRow = false;
417 	    if (st.sval=="" && length > 1) emptyRow = true;
418           }
419           xmlLine.append("    <" + colNames.get(i) + ">");
420           xmlLine.append(checkSpecialCharXml(st.sval.trim()));
421           xmlLine.append("</" +colNames.get(i) + ">\n");
422           //System.out.println("TT_WORD colNames.get("+i+")="+colNames.get(i)+"="+st.sval.trim());
423 	  OpenSync.getInstance().getLog().debug(Log.ROOT, "TT_WORD colNames.get("+i+")="+colNames.get(i)+"="+st.sval.trim());
424           i = (i + 1) % length;
425           tokenWasDelimiter = false;
426         }
427         // tok may equal " or ' when st.quoteChar('"'); st.quoteChar('\'');
428         // are used (it means suppressquotes equals yes!)
429         else if (tok == '"' || tok == '\'') {
430           OpenSync.getInstance().getLog().debug(Log.ROOT,"Quote found="+tok+" st.sval="+st.sval);
431           if (st.sval != null) {
432             if (startRow) {
433               xmlLine.append("  <ROW>\n");
434               emptyRow = false;
435               startRow = false;
436             }
437             // Bug fix notes - when using st.quoteChar('"'); st.quoteChar('\'');
438             // the quotes are ignored and you can just use st.sval.
439             // You don't have to get the next token!
440             xmlLine.append("    <" + colNames.get(i) + ">");
441             xmlLine.append(checkSpecialCharXml(st.sval.trim()));
442             xmlLine.append("</" +colNames.get(i) + ">\n");
443             System.out.println("\"' colNames.get("+i+")="+colNames.get(i)+"="+st.sval.trim());
444             i = (i + 1) % length;
445             tokenWasDelimiter = false;
446           }
447           //  i = (i + 1) % length;
448           // Get the word between the quotes
449           //tok = st.nextToken();
450           //System.out.println("Next token after quote="+tok);
451           //if (tok == StreamTokenizer.TT_WORD && st.sval != null) {
452           //  fw.write("    <" + colNames.get(i) + ">");
453           //  fw.write(checkSpecialCharXml(st.sval.trim()));
454           //  fw.write("</" +colNames.get(i) + ">\n");
455           //  System.out.println("\"' colNames.get("+i+")="+colNames.get(i)+"="+st.sval.trim());
456           //  i = (i + 1) % length;
457           //}
458         } else if (tok == (int)delimiter.charAt(0) && (tokenWasDelimiter || startRow)) {
459           if (startRow) {
460             xmlLine.append("  <ROW>\n");
461             startRow = false;
462             emptyRow = false;
463           }
464           xmlLine.append("    <" + colNames.get(i) + ">");
465           xmlLine.append("");
466           xmlLine.append("</" +colNames.get(i) + ">\n");
467           //System.out.println("delimiter colNames.get("+i+")="+colNames.get(i)+"=empty string");
468 	  OpenSync.getInstance().getLog().debug(Log.ROOT, "delimiter colNames.get("+i+")="+colNames.get(i)+"=empty string");
469           i = (i + 1) % length;
470         }else if (tok == (int)delimiter.charAt(0)){
471 	  emptyRow = false;
472           tokenWasDelimiter = true;
473         } else {
474 	  tokenWasDelimiter = false;
475 	}
476         tok = st.nextToken();
477         startRow = false;
478       }
479       if (!emptyRow) {
480 	fw.write(xmlLine.toString());
481 	// We've hit either the end of the line or the end of the file, so close the row.
482 	fw.write("  </ROW>\n");
483   nbRow ++;
484       }
485       nb_lines--;
486       tok = st.nextToken();
487     }
488     // Now we're at the end of the file, so close the XML Data,
489     // flush the buffer to disk, and close the newly-created file.
490     fw.write("</DATA>\n");
491     fw.flush();
492     fw.close();
493 
494  //   String s = fw.toString();
495  /*   for(int p=0; p < s.length(); p++) {
496       char c = s.charAt(p);
497       if(c != '>' && c != '<' && c!= '//' && !Character.isLetter(c) && !Character.isDigit(c)) System.out.print(c);
498     }
499     System.out.println(s);*/
500     return fw.toString();
501   }
502 
503   /***
504    * @param in
505    * @exception SAXException
506    * @exception IOException
507    * @exception FileNotFoundException
508    * @exception ParserConfigurationException
509    */
510   public String parseFixed(Reader in)
511     throws java.io.FileNotFoundException,
512            java.io.IOException, SAXException, ParserConfigurationException
513   {
514       //FileReader in = openFileForRead(inputFile);
515       StringWriter out = new StringWriter();
516       process((Reader) in, out);
517       return out.toString();
518   }
519   /***
520    * @param txt
521    * @exception SAXException
522    * @exception IOException
523    * @exception ParserConfigurationException
524    */
525   public String parseTxt(String txt)
526     throws java.io.IOException, SAXException, ParserConfigurationException
527   {
528 
529     StringReader input = new StringReader(txt);
530 
531 
532     if (type.equals("delimited"))
533       return parseCSV(input);
534     else if (type.equals("fixed"))
535       return parseFixed(input);
536     return "XMLFile";
537   }
538 
539    /***
540     * @param  argv
541     * @exception  SAXException
542     * @exception  IOException
543     * @exception  ParserConfigurationException
544     */
545    public static void main(String argv[])
546     throws java.io.IOException, SAXException, ParserConfigurationException
547   {
548     FileReader input = null;
549 
550     if (argv.length == 3)
551     {
552       Txt2Xml cp = new Txt2Xml();
553       try {
554         cp.readDescriptor(argv[0]);
555       } catch (Exception e) {
556         System.err.println("Exception caught: " + e.getMessage());
557       }
558 
559       try {
560         input = new FileReader(argv[1]);
561         if (cp.type.equals("delimited"))
562           cp.saveString(argv[2], cp.parseCSV(input));
563         else if (cp.type.equals("fixed"))
564           cp.saveString(argv[2], cp.parseFixed(input));
565 
566       }
567       catch (IOException e) {
568         System.err.println("Exception caught: " + e.getMessage());
569       }
570       finally {
571         if (input != null) {
572           input.close();
573           input = null;
574         }
575       }
576     } else {
577           System.out.println("\nUsage: java Txt2Xml configFile.xml csv-file xml-file");
578           System.out.println("       where csv-file is the comma-separated file, and ");
579           System.out.println("       xml-file is the XML file to be generated.");
580     }
581   }
582 
583     /***
584    * @param filename
585    * @param string
586    * @exception IOException
587    */
588   public void saveString(String filename, String string) throws IOException {
589     FileHelper.stringToFile(string,filename);
590   }
591 
592   public int getNbRow() {
593     return nbRow;
594   }
595 }