0516a9232f3c8065d475c3a52a9dc771785accb3
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / specimen / abcd206 / in / AbcdParseUtility.java
1 // $Id$
2 /**
3 * Copyright (C) 2015 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.io.specimen.abcd206.in;
11
12 import java.io.InputStream;
13 import java.net.URI;
14 import java.util.Date;
15
16 import javax.xml.parsers.DocumentBuilder;
17 import javax.xml.parsers.DocumentBuilderFactory;
18
19 import org.apache.log4j.Logger;
20 import org.joda.time.DateTime;
21 import org.w3c.dom.Document;
22 import org.w3c.dom.Element;
23 import org.w3c.dom.Node;
24 import org.w3c.dom.NodeList;
25
26 /**
27 * @author pplitzner
28 * @date 16.06.2015
29 *
30 */
31 public class AbcdParseUtility {
32
33 private static final Logger logger = Logger.getLogger(AbcdParseUtility.class);
34
35
36 public static URI parseFirstUri(NodeList nodeList, Abcd206ImportReport report){
37 URI uri = null;
38 String textContent = parseFirstTextContent(nodeList);
39 if(textContent!=null){
40 try {
41 uri = URI.create(textContent);
42 } catch (IllegalArgumentException e) {
43 if(report!=null){
44 report.addException("Exception during URI parsing!", e);
45 }
46 }
47 }
48 return uri;
49 }
50
51 public static String parseFirstTextContent(NodeList nodeList){
52 return parseFirstTextContent(nodeList, true);
53 }
54
55 public static String parseFirstTextContent(NodeList nodeList, boolean cleanUpWhiteSpaces){
56 String string = null;
57 if(nodeList.getLength()>0){
58 string = nodeList.item(0).getTextContent();
59 if(cleanUpWhiteSpaces){
60 string = string.replace("\n", "").replaceAll("( )+", " ").trim();
61 }
62 }
63 return string;
64 }
65
66 public static Double parseFirstDouble(NodeList nodeList, Abcd206ImportReport report){
67 if(nodeList.getLength()>0){
68 return parseDouble(nodeList.item(0), report);
69 }
70 return null;
71 }
72
73 public static Double parseDouble(Node node, Abcd206ImportReport report){
74 String message = "Could not parse double value for node " + node.getNodeName();
75 Double doubleValue = null;
76 try{
77 String textContent = node.getTextContent();
78 //remove 1000 dots
79 textContent = textContent.replace(".","");
80 //convert commmas
81 textContent = textContent.replace(",",".");
82 doubleValue = Double.parseDouble(textContent);
83 } catch (NullPointerException npe){
84 logger.error(message, npe);
85 if(report!=null){
86 report.addException(message, npe);
87 }
88 } catch (NumberFormatException nfe){
89 logger.error(message, nfe);
90 if(report!=null){
91 report.addException(message, nfe);
92 }
93 }
94 return doubleValue;
95 }
96
97 public static DateTime parseFirstDateTime(NodeList nodeList) {
98 DateTime dateTime = null;
99 String textContent = parseFirstTextContent(nodeList);
100 if(textContent!=null){
101 dateTime = DateTime.parse(textContent);
102 }
103 return dateTime;
104 }
105
106 public static Date parseFirstDate(NodeList nodeList) {
107 Date date = null;
108 DateTime dateTime = parseFirstDateTime(nodeList);
109 if(dateTime!=null){
110 date = dateTime.toDate();
111 }
112 return date;
113 }
114
115 /**
116 * Return the wrapper with the list of root nodes for an ABCD XML file
117 * @param fileName: the file's location
118 * @return a wrapper with a list of root nodes ("Unit")
119 */
120 public static UnitAssociationWrapper parseUnitsNodeList(InputStream inputStream, Abcd206ImportReport report) {
121 UnitAssociationWrapper unitAssociationWrapper = new UnitAssociationWrapper();
122 NodeList unitList = null;
123 try {
124 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
125 DocumentBuilder builder = factory.newDocumentBuilder();
126
127 Document document = builder.parse(inputStream);
128 Element root = document.getDocumentElement();
129 unitList = root.getElementsByTagName("Unit");
130 if (unitList.getLength()>0) {
131 unitAssociationWrapper.setPrefix("");
132 unitAssociationWrapper.setAssociatedUnits(unitList);
133 return unitAssociationWrapper;
134 }
135 unitList = root.getElementsByTagName("abcd:Unit");
136 if (unitList.getLength()>0) {
137 unitAssociationWrapper.setPrefix("abcd:");
138 unitAssociationWrapper.setAssociatedUnits(unitList);
139 return unitAssociationWrapper;
140 }
141 unitList = root.getElementsByTagName("abcd21:Unit");
142 if (unitList.getLength()>0) {
143 unitAssociationWrapper.setPrefix("abcd21:");
144 unitAssociationWrapper.setAssociatedUnits(unitList);
145 }
146 } catch (Exception e) {
147 logger.warn(e);
148 if(report!=null){
149 report.addException("Exception during parsing of nodeList!", e);
150 }
151 }
152 return unitAssociationWrapper;
153 }
154
155 }