1 package org.opensync.engine.server.adapter;
2
3 /***
4 * Title: OpenSync
5 * Description : This class implements the translation of text files into XML
6 * files. The text files may be tag separated values files or
7 * fixe size columns files.
8 * A descriptor file describes the text file to be translated.
9 */
10
11 import org.opensync.engine.server.Log;
12 import org.opensync.engine.server.OpenSyncException;
13 import org.opensync.engine.server.OpenSync;
14 import org.opensync.engine.server.adapter.Translator;
15 import org.opensync.engine.util.*;
16
17 import javax.xml.parsers.*;
18 import org.w3c.dom.*;
19 import org.xml.sax.*;
20
21 import java.io.*;
22
23
24 public class Txt2Xml extends Translator {
25
26 private DescriptorParser descriptorParser;
27 private boolean start;
28
29 private int nbRow = 0;
30
31 private final void error(Document doc, Node root, String errorDesc, String severity, int line) throws SAXException {
32
33 Element elt = doc.createElement(rowElementTag);
34 Attr attr = doc.createAttribute(DESCRIPTION);
35 attr.setValue(errorDesc);
36 elt.setAttributeNode(attr);
37 attr = doc.createAttribute(SEVERITY);
38 elt.setAttributeNode(attr);
39 attr = doc.createAttribute(LINE_NUMBER);
40 elt.setAttributeNode(attr);
41 root.appendChild(elt);
42
43 throw new SAXException(new OpenSyncException(errorDesc));
44 }
45
46 private final void chunkLine(String input, Document doc, Element root) throws SAXException
47 {
48 int outputStop;
49 int fieldStart;
50 int fieldStop;
51 Field currentField;
52 Element row, col;
53
54 row = doc.createElement(rowElementTag);
55
56 for (int i=0; i < fields.size() ; ++i)
57 {
58 currentField = (Field)fields.get(i);
59
60 col = doc.createElement(currentField.name);
61
62 fieldStart = startingOffset + currentField.start - 1;
63 fieldStop = Math.min(startingOffset + currentField.end, input.length());
64 if (trimFields) {
65 StringBuffer work = new StringBuffer(input.substring(fieldStart,fieldStop));
66 int ptr = work.length();
67 while (ptr > 0 && Character.isWhitespace(work.charAt(ptr-1)))
68 --ptr;
69 work.setLength(ptr);
70 col.appendChild(doc.createTextNode(new String(work.toString().toCharArray(), 0, work.length())));
71 } else {
72 col.appendChild(doc.createTextNode(
73 new String( input.substring(fieldStart,fieldStop).toCharArray(),
74 0,
75 fieldStop-fieldStart
76 )));
77
78 }
79
80 row.appendChild(col);
81 root.appendChild(row);
82 }
83 }
84
85 private final boolean matchBack(StringBuffer in, String division) {
86 int len_in = in.length();
87 int len_division = division.length();
88 int i =0;
89 boolean flag = true;
90 while (flag && ++i <= len_division) {
91 flag = (in.charAt(len_in - i) == division.charAt(len_division - i));
92 }
93 return flag;
94 }
95
96 private final boolean getLine(Reader input, StringBuffer in, String division)
97 throws IOException {
98 int c;
99 boolean cont = true;
100 while (cont && (c = input.read()) != -1) {
101 in.append((char) c);
102 if (matchBack(in,division)) {
103 cont = false;
104 in.setLength(in.length() - division.length());
105 }
106 }
107 return !cont;
108 }
109
110 private final boolean dropLine(Reader input, String division)
111 throws IOException {
112 int c;
113 boolean cont = true;
114 StringBuffer char_window = new StringBuffer();
115 while (cont && (c = input.read()) != -1) {
116 char_window.append((char) c);
117 if (matchBack(char_window,division)) {
118 cont = false;
119 char_window.setLength(char_window.length() - division.length());
120 }
121 }
122 return !cont;
123 }
124
125
126 static void writeDocument(Node node, Writer out) throws IOException {
127 int type = node.getNodeType();
128 switch (type) {
129 case Node.ELEMENT_NODE:
130 out.write("<" + node.getNodeName());
131 NamedNodeMap attrs = node.getAttributes();
132 int len = attrs.getLength();
133 for (int i=0; i<len; i++) {
134 Attr attr = (Attr)attrs.item(i);
135 out.write(" " + attr.getNodeName() + "=\"" +
136 escapeXML_Document(attr.getNodeValue()) + "\"");
137 }
138 out.write('>');
139 NodeList children = node.getChildNodes();
140 len = children.getLength();
141 for (int i=0; i<len; i++)
142 writeDocument(children.item(i), out);
143 out.write("</" + node.getNodeName() + ">");
144 break;
145 case Node.ENTITY_REFERENCE_NODE:
146 out.write("&" + node.getNodeName() + ";");
147 break;
148 case Node.CDATA_SECTION_NODE:
149 out.write("<![CDATA[" + node.getNodeValue() + "]]>");
150 break;
151 case Node.TEXT_NODE:
152 out.write(escapeXML_Document(node.getNodeValue()));
153 break;
154 case Node.PROCESSING_INSTRUCTION_NODE:
155 out.write("<?" + node.getNodeName());
156 String data = node.getNodeValue();
157 if (data!=null && data.length()>0)
158 out.write(" " + data);
159 out.write("?>");
160 break;
161 }
162 }
163
164 static String escapeXML_Document(String s) {
165 StringBuffer str = new StringBuffer();
166 int len = (s != null) ? s.length() : 0;
167 for (int i=0; i<len; i++) {
168 char ch = s.charAt(i);
169 switch (ch) {
170 case '<': str.append("<"); break;
171 case '>': str.append(">"); break;
172 case '&': str.append("&"); break;
173 case '"': str.append("""); break;
174 case '\'': str.append("'"); break;
175 default: str.append(ch);
176 }
177 }
178 return str.toString();
179 }
180
181
182 /***
183 * @param input
184 * @param output
185 * @exception SAXException, IOException
186 * @exception IOException
187 * @exception ParserConfigurationException
188 * @exception SAXException
189 */
190 public final void process(
191 Reader input,
192 StringWriter output
193 )
194 throws SAXException, IOException, ParserConfigurationException
195 {
196
197
198 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
199 DocumentBuilder builder = factory.newDocumentBuilder();
200 Document document = builder.newDocument();
201
202
203 String in = "";
204 long nb_lines = this.linesCounter(input);
205
206 input.reset();
207 BufferedReader br = new BufferedReader(input);
208
209 if (skipfirstlines > 0) {
210 int linesToSkip = skipfirstlines;
211 while (linesToSkip > 0) {
212 br.readLine();
213
214 linesToSkip--;
215 }
216 }
217
218 nb_lines = nb_lines - this.skiplastlines;
219
220 Element root = document.createElement(documentElementTag);
221
222 int count = 0;
223 boolean fatal = false;
224 boolean cont = true;
225 while ( (cont && !fatal) && count < nb_lines)
226 {
227 in = "";
228
229 cont = ((in = br.readLine()) != null);
230 if (cont) {
231 if (in.length() < ((Field)(fields.get(fields.size()-1))).end && !offWidthOK) {
232 error(document, root, OFF_WIDTH_ROW,NONFATAL,count);
233 } else if (offWidthOK &&
234 fields.size() > 1 &&
235 in.length() < ((Field)(fields.get(fields.size()-2))).end
236 ) {
237 if (in.length() > 1) {
238 error(document, root, MISSING_LAST_FIELD,NONFATAL,count);
239 }
240 } else {
241 chunkLine(in.toString(),document, root);
242 }
243 } else if (in.length() != 0) {
244 fatal = true;
245 error(document, root, UNTERMINATED_ROW,FATAL,count);
246 }
247 ++count;
248 }
249
250 writeDocument(root,output);
251 output.flush();
252 }
253
254 /****/
255 public Txt2Xml() {}
256
257 /***
258 * @param filename
259 * @exception IOException
260 * @exception FileNotFoundException
261 */
262 public FileReader openFileForRead(
263 String filename
264 ) throws FileNotFoundException,IOException
265 {
266
267 File file = new File(filename);
268 if (!file.exists()) {
269 throw new FileNotFoundException("File " + filename + " does not exist.");
270 }
271 if (!file.canRead()) {
272 throw new IOException("File " + filename + " is not readable.");
273 }
274 return new FileReader(file);
275 }
276
277 /***
278 * @param filename
279 * @param overwrite
280 * @exception IOException
281 */
282 public FileOutputStream openFileForWrite(
283 String filename,
284 boolean overwrite
285 ) throws IOException
286 {
287 File file = new File(filename);
288 if (file.exists() && !overwrite) {
289 throw new IOException("File " + filename + " already exists.");
290 }
291 if (file.exists() && !file.canWrite()) {
292 throw new IOException("File " + filename + " is not writeable.");
293 }
294 return new FileOutputStream(filename,false);
295 }
296
297 /***
298 * @param descriptorFileName
299 * @exception IOException
300 * @exception FileNotFoundException
301 * @exception SAXException
302 * @exception ParserConfigurationException
303 */
304 public void readDescriptor(String descriptorFileName)
305 throws ParserConfigurationException, SAXException,FileNotFoundException,IOException {
306 descriptorParser = new DescriptorParser(this);
307
308 SAXParserFactory factory = SAXParserFactory.newInstance();
309 factory.setValidating(true);
310 SAXParser parser = factory.newSAXParser();
311 parser.parse( new InputSource(new File(descriptorFileName).toURL().toExternalForm()), descriptorParser);
312 }
313
314 public void startReadInputFile(boolean status) {
315 start = status;
316 }
317
318 /***
319 * @param input
320 * @exception IOException
321 */
322 public String parseCSV(Reader input)
323 throws java.io.IOException
324
325 {
326
327 long nb_lines = this.linesCounter(input);
328
329 input.reset();
330
331 BufferedReader br = new BufferedReader(input);
332
333 if (start && skipfirstlines > 0) {
334 int linesToSkip = skipfirstlines;
335 while (linesToSkip > 0) {
336 br.readLine();
337 linesToSkip--;
338 }
339 }
340
341
342
343 nb_lines = nb_lines - this.skiplastlines;
344
345
346 StreamTokenizer st = new StreamTokenizer(br);
347
348 st.resetSyntax();
349
350
351
352 st.wordChars(' ', ' ');
353
354 st.wordChars('!','@');
355 st.wordChars('A', 'Z');
356
357 st.wordChars(91, 96);
358 st.wordChars('a', 'z');
359
360 st.wordChars(123, 127);
361
362 st.wordChars(128 + 32, 255);
363
364 if (!suppressquotes) {
365
366 st.wordChars('"', '"');
367 st.wordChars('\'', '\'');
368 } else {
369
370 st.quoteChar('"');
371 st.quoteChar('\'');
372 }
373
374
375 st.eolIsSignificant(true);
376
377
378
379
380
381
382 st.ordinaryChar((int)delimiter.charAt(0));
383
384
385
386 StringWriter fw = new StringWriter();
387
388
389
390 fw.write("<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n");
391 fw.write("<DATA>\n");
392
393 boolean tokenWasDelimiter = false;
394 boolean delimiterFound = false;
395 boolean startRow = false;
396 boolean emptyRow = true;
397 int tok;
398
399 tok = st.nextToken();
400 while (st.ttype != StreamTokenizer.TT_EOF && nb_lines > 0)
401 {
402
403 int i = 0;
404 int length = colNames.size();
405 startRow = true;
406 emptyRow = true;
407
408
409 StringBuffer xmlLine = new StringBuffer();
410
411 while (st.ttype != StreamTokenizer.TT_EOL) {
412 if (tok == StreamTokenizer.TT_WORD && st.sval != null) {
413 if (startRow) {
414 xmlLine.append(" <ROW>\n");
415 startRow = false;
416 if (st.sval=="" && length == 1) emptyRow = false;
417 if (st.sval=="" && length > 1) emptyRow = true;
418 }
419 xmlLine.append(" <" + colNames.get(i) + ">");
420 xmlLine.append(checkSpecialCharXml(st.sval.trim()));
421 xmlLine.append("</" +colNames.get(i) + ">\n");
422
423 OpenSync.getInstance().getLog().debug(Log.ROOT, "TT_WORD colNames.get("+i+")="+colNames.get(i)+"="+st.sval.trim());
424 i = (i + 1) % length;
425 tokenWasDelimiter = false;
426 }
427
428
429 else if (tok == '"' || tok == '\'') {
430 OpenSync.getInstance().getLog().debug(Log.ROOT,"Quote found="+tok+" st.sval="+st.sval);
431 if (st.sval != null) {
432 if (startRow) {
433 xmlLine.append(" <ROW>\n");
434 emptyRow = false;
435 startRow = false;
436 }
437
438
439
440 xmlLine.append(" <" + colNames.get(i) + ">");
441 xmlLine.append(checkSpecialCharXml(st.sval.trim()));
442 xmlLine.append("</" +colNames.get(i) + ">\n");
443 System.out.println("\"' colNames.get("+i+")="+colNames.get(i)+"="+st.sval.trim());
444 i = (i + 1) % length;
445 tokenWasDelimiter = false;
446 }
447
448
449
450
451
452
453
454
455
456
457
458 } else if (tok == (int)delimiter.charAt(0) && (tokenWasDelimiter || startRow)) {
459 if (startRow) {
460 xmlLine.append(" <ROW>\n");
461 startRow = false;
462 emptyRow = false;
463 }
464 xmlLine.append(" <" + colNames.get(i) + ">");
465 xmlLine.append("");
466 xmlLine.append("</" +colNames.get(i) + ">\n");
467
468 OpenSync.getInstance().getLog().debug(Log.ROOT, "delimiter colNames.get("+i+")="+colNames.get(i)+"=empty string");
469 i = (i + 1) % length;
470 }else if (tok == (int)delimiter.charAt(0)){
471 emptyRow = false;
472 tokenWasDelimiter = true;
473 } else {
474 tokenWasDelimiter = false;
475 }
476 tok = st.nextToken();
477 startRow = false;
478 }
479 if (!emptyRow) {
480 fw.write(xmlLine.toString());
481
482 fw.write(" </ROW>\n");
483 nbRow ++;
484 }
485 nb_lines--;
486 tok = st.nextToken();
487 }
488
489
490 fw.write("</DATA>\n");
491 fw.flush();
492 fw.close();
493
494
495
496
497
498
499
500 return fw.toString();
501 }
502
503 /***
504 * @param in
505 * @exception SAXException
506 * @exception IOException
507 * @exception FileNotFoundException
508 * @exception ParserConfigurationException
509 */
510 public String parseFixed(Reader in)
511 throws java.io.FileNotFoundException,
512 java.io.IOException, SAXException, ParserConfigurationException
513 {
514
515 StringWriter out = new StringWriter();
516 process((Reader) in, out);
517 return out.toString();
518 }
519 /***
520 * @param txt
521 * @exception SAXException
522 * @exception IOException
523 * @exception ParserConfigurationException
524 */
525 public String parseTxt(String txt)
526 throws java.io.IOException, SAXException, ParserConfigurationException
527 {
528
529 StringReader input = new StringReader(txt);
530
531
532 if (type.equals("delimited"))
533 return parseCSV(input);
534 else if (type.equals("fixed"))
535 return parseFixed(input);
536 return "XMLFile";
537 }
538
539 /***
540 * @param argv
541 * @exception SAXException
542 * @exception IOException
543 * @exception ParserConfigurationException
544 */
545 public static void main(String argv[])
546 throws java.io.IOException, SAXException, ParserConfigurationException
547 {
548 FileReader input = null;
549
550 if (argv.length == 3)
551 {
552 Txt2Xml cp = new Txt2Xml();
553 try {
554 cp.readDescriptor(argv[0]);
555 } catch (Exception e) {
556 System.err.println("Exception caught: " + e.getMessage());
557 }
558
559 try {
560 input = new FileReader(argv[1]);
561 if (cp.type.equals("delimited"))
562 cp.saveString(argv[2], cp.parseCSV(input));
563 else if (cp.type.equals("fixed"))
564 cp.saveString(argv[2], cp.parseFixed(input));
565
566 }
567 catch (IOException e) {
568 System.err.println("Exception caught: " + e.getMessage());
569 }
570 finally {
571 if (input != null) {
572 input.close();
573 input = null;
574 }
575 }
576 } else {
577 System.out.println("\nUsage: java Txt2Xml configFile.xml csv-file xml-file");
578 System.out.println(" where csv-file is the comma-separated file, and ");
579 System.out.println(" xml-file is the XML file to be generated.");
580 }
581 }
582
583 /***
584 * @param filename
585 * @param string
586 * @exception IOException
587 */
588 public void saveString(String filename, String string) throws IOException {
589 FileHelper.stringToFile(string,filename);
590 }
591
592 public int getNbRow() {
593 return nbRow;
594 }
595 }