Revision c95e3daf
Added by Patrick Plitzner almost 9 years ago
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/specimen/abcd206/in/Abcd206Import.java | ||
---|---|---|
9 | 9 |
|
10 | 10 |
package eu.etaxonomy.cdm.io.specimen.abcd206.in; |
11 | 11 |
|
12 |
import java.io.InputStream; |
|
13 | 12 |
import java.net.MalformedURLException; |
14 | 13 |
import java.util.ArrayList; |
15 | 14 |
import java.util.HashMap; |
... | ... | |
19 | 18 |
import java.util.Set; |
20 | 19 |
import java.util.UUID; |
21 | 20 |
|
22 |
import javax.xml.parsers.DocumentBuilder; |
|
23 |
import javax.xml.parsers.DocumentBuilderFactory; |
|
24 |
|
|
25 | 21 |
import org.apache.commons.lang.StringUtils; |
26 | 22 |
import org.apache.log4j.Logger; |
27 | 23 |
import org.springframework.stereotype.Component; |
28 |
import org.w3c.dom.Document; |
|
29 | 24 |
import org.w3c.dom.Element; |
30 | 25 |
import org.w3c.dom.NodeList; |
31 | 26 |
|
... | ... | |
214 | 209 |
} |
215 | 210 |
} |
216 | 211 |
|
217 |
InputStream source = state.getConfig().getSource();
|
|
218 |
NodeList unitsList = getUnitsNodeList(source);
|
|
212 |
NodeList unitsList = AbcdParseUtility.getUnitsNodeList(state);
|
|
213 |
prefix = AbcdParseUtility.getPrefix(state);
|
|
219 | 214 |
|
220 | 215 |
if (unitsList != null) { |
221 | 216 |
String message = "nb units to insert: " + unitsList.getLength(); |
... | ... | |
299 | 294 |
return; |
300 | 295 |
} |
301 | 296 |
|
302 |
/** |
|
303 |
* Return the list of root nodes for an ABCD 2.06 XML file |
|
304 |
* @param fileName: the file's location |
|
305 |
* @return the list of root nodes ("Unit") |
|
306 |
*/ |
|
307 |
protected NodeList getUnitsNodeList(InputStream inputStream) { |
|
308 |
NodeList unitList = null; |
|
309 |
try { |
|
310 |
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); |
|
311 |
DocumentBuilder builder = factory.newDocumentBuilder(); |
|
312 |
|
|
313 |
Document document = builder.parse(inputStream); |
|
314 |
Element root = document.getDocumentElement(); |
|
315 |
unitList = root.getElementsByTagName("Unit"); |
|
316 |
if (unitList.getLength() == 0) { |
|
317 |
unitList = root.getElementsByTagName("abcd:Unit"); |
|
318 |
prefix = "abcd:"; |
|
319 |
} |
|
320 |
if (unitList.getLength() == 0) { |
|
321 |
unitList = root.getElementsByTagName("abcd21:Unit"); |
|
322 |
prefix = "abcd21:"; |
|
323 |
} |
|
324 |
} catch (Exception e) { |
|
325 |
logger.warn(e); |
|
326 |
} |
|
327 |
return unitList; |
|
328 |
} |
|
329 |
|
|
330 | 297 |
/** |
331 | 298 |
* Handle a single unit |
332 | 299 |
* @param state |
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/specimen/abcd206/in/AbcdParseUtility.java | ||
---|---|---|
9 | 9 |
*/ |
10 | 10 |
package eu.etaxonomy.cdm.io.specimen.abcd206.in; |
11 | 11 |
|
12 |
import java.io.InputStream; |
|
12 | 13 |
import java.net.URI; |
13 | 14 |
import java.util.Date; |
14 | 15 |
|
16 |
import javax.xml.parsers.DocumentBuilder; |
|
17 |
import javax.xml.parsers.DocumentBuilderFactory; |
|
18 |
|
|
15 | 19 |
import org.apache.log4j.Logger; |
16 | 20 |
import org.joda.time.DateTime; |
21 |
import org.w3c.dom.Document; |
|
22 |
import org.w3c.dom.Element; |
|
17 | 23 |
import org.w3c.dom.Node; |
18 | 24 |
import org.w3c.dom.NodeList; |
19 | 25 |
|
... | ... | |
96 | 102 |
return date; |
97 | 103 |
} |
98 | 104 |
|
105 |
/** |
|
106 |
* Return the list of root nodes for an ABCD XML file |
|
107 |
* @param fileName: the file's location |
|
108 |
* @return the list of root nodes ("Unit") |
|
109 |
*/ |
|
110 |
public static NodeList getUnitsNodeList(Abcd206ImportState state) { |
|
111 |
InputStream inputStream = state.getConfig().getSource(); |
|
112 |
NodeList unitList = null; |
|
113 |
try { |
|
114 |
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); |
|
115 |
DocumentBuilder builder = factory.newDocumentBuilder(); |
|
116 |
|
|
117 |
Document document = builder.parse(inputStream); |
|
118 |
Element root = document.getDocumentElement(); |
|
119 |
unitList = root.getElementsByTagName("Unit"); |
|
120 |
if (unitList.getLength() == 0) { |
|
121 |
unitList = root.getElementsByTagName("abcd:Unit"); |
|
122 |
} |
|
123 |
if (unitList.getLength() == 0) { |
|
124 |
unitList = root.getElementsByTagName("abcd21:Unit"); |
|
125 |
} |
|
126 |
} catch (Exception e) { |
|
127 |
logger.warn(e); |
|
128 |
} |
|
129 |
return unitList; |
|
130 |
} |
|
131 |
|
|
132 |
/** |
|
133 |
* Return the prefix an ABCD XML file |
|
134 |
* @param fileName: the file's location |
|
135 |
* @return the prefix |
|
136 |
*/ |
|
137 |
public static String getPrefix(Abcd206ImportState state) { |
|
138 |
InputStream inputStream = state.getConfig().getSource(); |
|
139 |
NodeList unitList = null; |
|
140 |
try { |
|
141 |
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); |
|
142 |
DocumentBuilder builder = factory.newDocumentBuilder(); |
|
143 |
|
|
144 |
Document document = builder.parse(inputStream); |
|
145 |
Element root = document.getDocumentElement(); |
|
146 |
unitList = root.getElementsByTagName("Unit"); |
|
147 |
if (unitList.getLength()>0) { |
|
148 |
return null; |
|
149 |
} |
|
150 |
unitList = root.getElementsByTagName("abcd:Unit"); |
|
151 |
if (unitList.getLength()>0) { |
|
152 |
return "abcd:"; |
|
153 |
} |
|
154 |
unitList = root.getElementsByTagName("abcd21:Unit"); |
|
155 |
if (unitList.getLength() == 0) { |
|
156 |
return "abcd21:"; |
|
157 |
} |
|
158 |
} catch (Exception e) { |
|
159 |
logger.warn(e); |
|
160 |
} |
|
161 |
return null; |
|
162 |
} |
|
163 |
|
|
99 | 164 |
} |
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/specimen/abcd206/in/UnitAssociationParser.java | ||
---|---|---|
9 | 9 |
*/ |
10 | 10 |
package eu.etaxonomy.cdm.io.specimen.abcd206.in; |
11 | 11 |
|
12 |
import java.util.List; |
|
12 |
import java.io.IOException; |
|
13 |
import java.net.URI; |
|
13 | 14 |
|
15 |
import org.apache.http.client.ClientProtocolException; |
|
16 |
import org.apache.log4j.Logger; |
|
14 | 17 |
import org.w3c.dom.Element; |
15 | 18 |
import org.w3c.dom.NodeList; |
16 | 19 |
|
17 | 20 |
import eu.etaxonomy.cdm.api.application.ICdmApplicationConfiguration; |
18 |
import eu.etaxonomy.cdm.model.agent.AgentBase; |
|
19 |
import eu.etaxonomy.cdm.model.agent.Institution; |
|
20 |
import eu.etaxonomy.cdm.model.occurrence.Collection; |
|
21 |
import eu.etaxonomy.cdm.persistence.query.MatchMode; |
|
21 |
import eu.etaxonomy.cdm.ext.occurrence.OccurenceQuery; |
|
22 |
import eu.etaxonomy.cdm.ext.occurrence.bioCase.BioCaseQueryServiceWrapper; |
|
22 | 23 |
|
23 | 24 |
/** |
24 | 25 |
* @author pplitzner |
... | ... | |
27 | 28 |
*/ |
28 | 29 |
public class UnitAssociationParser { |
29 | 30 |
|
31 |
private static final Logger logger = Logger.getLogger(UnitAssociationParser.class); |
|
32 |
|
|
30 | 33 |
private final String prefix; |
31 | 34 |
|
32 | 35 |
private final Abcd206ImportReport report; |
... | ... | |
47 | 50 |
NodeList unitAssociationList = associations.getElementsByTagName(prefix+"UnitAssociation"); |
48 | 51 |
|
49 | 52 |
for(int i=0;i<unitAssociationList.getLength();i++){ |
50 |
// BioCaseQueryServiceWrapper serviceWrapper = new BioCaseQueryServiceWrapper(); |
|
51 |
} |
|
52 |
|
|
53 |
|
|
54 | 53 |
|
55 |
|
|
56 |
//FIXME: how to handle multiple unit assocations? |
|
57 |
// maybe check AssociationType but this needs to be stable |
|
58 |
// for only the first unitAssociation will be used |
|
59 |
if(unitAssociationList.getLength()>0 && unitAssociationList.item(0) instanceof Element){ |
|
60 | 54 |
Element unitAssociation = (Element)unitAssociationList.item(0); |
61 |
String collectionName = AbcdParseUtility.parseFirstTextContent(unitAssociation.getElementsByTagName(prefix+"SourceName")); |
|
62 |
List<Collection> matchingCollections = cdmAppController.getCollectionService().findByTitle(Collection.class, collectionName, MatchMode.EXACT, null, null, null, null, null).getRecords(); |
|
63 |
Collection collection; |
|
64 |
if(matchingCollections.size()==1){ |
|
65 |
collection = matchingCollections.iterator().next(); |
|
66 |
} |
|
67 |
else{ |
|
68 |
collection = Collection.NewInstance(); |
|
69 |
collection.setName(collectionName); |
|
55 |
|
|
56 |
//unit id |
|
57 |
String unitId = AbcdParseUtility.parseFirstTextContent(unitAssociation.getElementsByTagName(prefix+"UnitID")); |
|
58 |
//data access point |
|
59 |
URI datasetAccessPoint = AbcdParseUtility.parseFirstUri(unitAssociation.getElementsByTagName(prefix+"DatasetAccessPoint")); |
|
60 |
if(datasetAccessPoint==null){ |
|
61 |
datasetAccessPoint = AbcdParseUtility.parseFirstUri(unitAssociation.getElementsByTagName(prefix+"Comment")); |
|
70 | 62 |
} |
71 |
String institutionName = AbcdParseUtility.parseFirstTextContent(unitAssociation.getElementsByTagName(prefix+"SourceInstitutionCode")); |
|
72 |
List<AgentBase> matchingInstitutions = cdmAppController.getAgentService().findByTitle(Institution.class, institutionName, MatchMode.EXACT, null, null, null, null, null).getRecords(); |
|
73 |
Institution institution; |
|
74 |
if(matchingInstitutions.size()==1){ |
|
75 |
institution = (Institution) matchingInstitutions.iterator().next(); |
|
63 |
String message = "Unable to load unit "+unitId+" from "+datasetAccessPoint; |
|
64 |
if(datasetAccessPoint!=null){ |
|
65 |
//association type |
|
66 |
NodeList associationTypeList = unitAssociation.getElementsByTagName(prefix+"AssociationType"); |
|
67 |
|
|
68 |
BioCaseQueryServiceWrapper serviceWrapper = new BioCaseQueryServiceWrapper(); |
|
69 |
OccurenceQuery query = new OccurenceQuery(unitId); |
|
70 |
try { |
|
71 |
serviceWrapper.query(query, datasetAccessPoint); |
|
72 |
} catch (ClientProtocolException e) { |
|
73 |
logger.error(message, e); |
|
74 |
} catch (IOException e) { |
|
75 |
logger.error(message, e); |
|
76 |
} |
|
76 | 77 |
} |
77 | 78 |
else{ |
78 |
institution = Institution.NewInstance(); |
|
79 |
institution.setName(institutionName); |
|
79 |
report.addInfoMessage(message); |
|
80 | 80 |
} |
81 |
} |
|
81 | 82 |
|
82 |
String unitId = AbcdParseUtility.parseFirstTextContent(unitAssociation.getElementsByTagName(prefix+"UnitID")); |
|
83 |
NodeList associationTypeList = unitAssociation.getElementsByTagName(prefix+"AssociationType"); |
|
84 | 83 |
|
85 |
} |
|
84 |
|
|
85 |
|
|
86 |
// //FIXME: how to handle multiple unit assocations? |
|
87 |
// // maybe check AssociationType but this needs to be stable |
|
88 |
// // for only the first unitAssociation will be used |
|
89 |
// if(unitAssociationList.getLength()>0 && unitAssociationList.item(0) instanceof Element){ |
|
90 |
// String collectionName = AbcdParseUtility.parseFirstTextContent(unitAssociation.getElementsByTagName(prefix+"SourceName")); |
|
91 |
// List<Collection> matchingCollections = cdmAppController.getCollectionService().findByTitle(Collection.class, collectionName, MatchMode.EXACT, null, null, null, null, null).getRecords(); |
|
92 |
// Collection collection; |
|
93 |
// if(matchingCollections.size()==1){ |
|
94 |
// collection = matchingCollections.iterator().next(); |
|
95 |
// } |
|
96 |
// else{ |
|
97 |
// collection = Collection.NewInstance(); |
|
98 |
// collection.setName(collectionName); |
|
99 |
// } |
|
100 |
// String institutionName = AbcdParseUtility.parseFirstTextContent(unitAssociation.getElementsByTagName(prefix+"SourceInstitutionCode")); |
|
101 |
// List<AgentBase> matchingInstitutions = cdmAppController.getAgentService().findByTitle(Institution.class, institutionName, MatchMode.EXACT, null, null, null, null, null).getRecords(); |
|
102 |
// Institution institution; |
|
103 |
// if(matchingInstitutions.size()==1){ |
|
104 |
// institution = (Institution) matchingInstitutions.iterator().next(); |
|
105 |
// } |
|
106 |
// else{ |
|
107 |
// institution = Institution.NewInstance(); |
|
108 |
// institution.setName(institutionName); |
|
109 |
// } |
|
110 |
// } |
|
86 | 111 |
} |
87 | 112 |
} |
88 | 113 |
|
Also available in: Unified diff
Move unit parsing to utility class