3 * Copyright (C) 2015 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.specimen
.abcd206
.in
;
12 import java
.io
.InputStream
;
14 import java
.util
.Date
;
16 import javax
.xml
.parsers
.DocumentBuilder
;
17 import javax
.xml
.parsers
.DocumentBuilderFactory
;
19 import org
.apache
.log4j
.Logger
;
20 import org
.joda
.time
.DateTime
;
21 import org
.w3c
.dom
.Document
;
22 import org
.w3c
.dom
.Element
;
23 import org
.w3c
.dom
.Node
;
24 import org
.w3c
.dom
.NodeList
;
31 public class AbcdParseUtility
{
33 private static final Logger logger
= Logger
.getLogger(AbcdParseUtility
.class);
36 public static URI
parseFirstUri(NodeList nodeList
, Abcd206ImportReport report
){
38 String textContent
= parseFirstTextContent(nodeList
);
39 if(textContent
!=null){
41 uri
= URI
.create(textContent
);
42 } catch (IllegalArgumentException e
) {
44 report
.addException("Exception during URI parsing!", e
);
51 public static String
parseFirstTextContent(NodeList nodeList
){
52 return parseFirstTextContent(nodeList
, true);
55 public static String
parseFirstTextContent(NodeList nodeList
, boolean cleanUpWhiteSpaces
){
57 if(nodeList
.getLength()>0){
58 string
= nodeList
.item(0).getTextContent();
59 if(cleanUpWhiteSpaces
){
60 string
= string
.replace("\n", "").replaceAll("( )+", " ").trim();
66 public static Double
parseFirstDouble(NodeList nodeList
, Abcd206ImportReport report
){
67 if(nodeList
.getLength()>0){
68 return parseDouble(nodeList
.item(0), report
);
73 public static Double
parseDouble(Node node
, Abcd206ImportReport report
){
74 String message
= "Could not parse double value for node " + node
.getNodeName();
75 Double doubleValue
= null;
77 String textContent
= node
.getTextContent();
79 textContent
= textContent
.replace(".","");
81 textContent
= textContent
.replace(",",".");
82 doubleValue
= Double
.parseDouble(textContent
);
83 } catch (NullPointerException npe
){
84 logger
.error(message
, npe
);
86 report
.addException(message
, npe
);
88 } catch (NumberFormatException nfe
){
89 logger
.error(message
, nfe
);
91 report
.addException(message
, nfe
);
97 public static DateTime
parseFirstDateTime(NodeList nodeList
) {
98 DateTime dateTime
= null;
99 String textContent
= parseFirstTextContent(nodeList
);
100 if(textContent
!=null){
101 dateTime
= DateTime
.parse(textContent
);
106 public static Date
parseFirstDate(NodeList nodeList
) {
108 DateTime dateTime
= parseFirstDateTime(nodeList
);
110 date
= dateTime
.toDate();
116 * Return the wrapper with the list of root nodes for an ABCD XML file
117 * @param fileName: the file's location
118 * @return a wrapper with a list of root nodes ("Unit")
120 public static UnitAssociationWrapper
parseUnitsNodeList(InputStream inputStream
, Abcd206ImportReport report
) {
121 UnitAssociationWrapper unitAssociationWrapper
= new UnitAssociationWrapper();
122 NodeList unitList
= null;
124 DocumentBuilderFactory factory
= DocumentBuilderFactory
.newInstance();
125 DocumentBuilder builder
= factory
.newDocumentBuilder();
127 Document document
= builder
.parse(inputStream
);
128 Element root
= document
.getDocumentElement();
129 unitList
= root
.getElementsByTagName("Unit");
130 if (unitList
.getLength()>0) {
131 unitAssociationWrapper
.setPrefix("");
132 unitAssociationWrapper
.setAssociatedUnits(unitList
);
133 return unitAssociationWrapper
;
135 unitList
= root
.getElementsByTagName("abcd:Unit");
136 if (unitList
.getLength()>0) {
137 unitAssociationWrapper
.setPrefix("abcd:");
138 unitAssociationWrapper
.setAssociatedUnits(unitList
);
139 return unitAssociationWrapper
;
141 unitList
= root
.getElementsByTagName("abcd21:Unit");
142 if (unitList
.getLength()>0) {
143 unitAssociationWrapper
.setPrefix("abcd21:");
144 unitAssociationWrapper
.setAssociatedUnits(unitList
);
146 } catch (Exception e
) {
149 report
.addException("Exception during parsing of nodeList!", e
);
152 return unitAssociationWrapper
;