821b36f49cf3d12ab48dbde0fd4ba6b426b83bcb
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / tcsrdf / TcsRdfTaxonImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.tcsrdf;
10
11 import java.util.ArrayList;
12 import java.util.Iterator;
13 import java.util.List;
14 import java.util.Set;
15
16 import org.apache.log4j.Logger;
17 import org.jdom.Attribute;
18 import org.jdom.Element;
19 import org.jdom.Namespace;
20 import org.jdom.filter.ElementFilter;
21 import org.jdom.filter.Filter;
22 import org.springframework.stereotype.Component;
23
24 import eu.etaxonomy.cdm.api.service.ITaxonService;
25 import eu.etaxonomy.cdm.common.XmlHelp;
26 import eu.etaxonomy.cdm.io.common.ICdmIO;
27 import eu.etaxonomy.cdm.io.common.ImportHelper;
28 import eu.etaxonomy.cdm.io.common.MapWrapper;
29 import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
30 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
31 import eu.etaxonomy.cdm.model.description.Distribution;
32 import eu.etaxonomy.cdm.model.description.Feature;
33 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;
34 import eu.etaxonomy.cdm.model.description.PresenceTerm;
35 import eu.etaxonomy.cdm.model.description.TaxonDescription;
36 import eu.etaxonomy.cdm.model.location.NamedArea;
37 import eu.etaxonomy.cdm.model.location.TdwgArea;
38 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
39 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
40 import eu.etaxonomy.cdm.model.taxon.Synonym;
41 import eu.etaxonomy.cdm.model.taxon.Taxon;
42 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
43
44 /**
45 * @author a.mueller
46 * @created 29.05.2008
47 * @version 1.0
48 */
49 @Component
50 public class TcsRdfTaxonImport extends TcsRdfImportBase implements ICdmIO<TcsRdfImportState> {
51 private static final Logger logger = Logger.getLogger(TcsRdfTaxonImport.class);
52
53 private static int modCount = 30000;
54
55 public TcsRdfTaxonImport(){
56 super();
57 }
58
59
60 @Override
61 public boolean doCheck(TcsRdfImportState state){
62 boolean result = true;
63 logger.warn("Checking for Taxa not yet implemented");
64 //result &= checkArticlesWithoutJournal(bmiConfig);
65 //result &= checkPartOfJournal(bmiConfig);
66
67 return result;
68 }
69
70 protected static CdmSingleAttributeXmlMapperBase[] standardMappers = new CdmSingleAttributeXmlMapperBase[]{
71 // new CdmTextElementMapper("genusPart", "genusOrUninomial")
72
73 };
74
75
76 protected static CdmSingleAttributeXmlMapperBase[] operationalMappers = new CdmSingleAttributeXmlMapperBase[]{
77 new CdmUnclearMapper("hasName")
78 ,new CdmUnclearMapper("hasName")
79 , new CdmUnclearMapper("accordingTo")
80 , new CdmUnclearMapper("hasRelationship")
81 , new CdmUnclearMapper("code", nsTgeo)
82 };
83
84 protected static CdmSingleAttributeXmlMapperBase[] unclearMappers = new CdmSingleAttributeXmlMapperBase[]{
85 new CdmUnclearMapper("primary")
86 , new CdmUnclearMapper("note", nsTcom)
87 , new CdmUnclearMapper("taxonStatus", nsTpalm)
88
89 , new CdmUnclearMapper("TaxonName", nsTn)
90 , new CdmUnclearMapper("dateOfEntry", nsTpalm)
91 };
92
93
94
95 @Override
96 protected boolean doInvoke(TcsRdfImportState state){
97
98 MapWrapper<TaxonBase> taxonMap = (MapWrapper<TaxonBase>)state.getStore(ICdmIO.TAXON_STORE);
99 MapWrapper<TaxonNameBase> taxonNameMap = (MapWrapper<TaxonNameBase>)state.getStore(ICdmIO.TAXONNAME_STORE);
100 MapWrapper<ReferenceBase> referenceMap = (MapWrapper<ReferenceBase>)state.getStore(ICdmIO.REFERENCE_STORE);
101 MapWrapper<ReferenceBase> nomRefMap = (MapWrapper<ReferenceBase>)state.getStore(ICdmIO.NOMREF_STORE);
102
103 String xmlElementName;
104 String xmlAttributeName;
105 Namespace elementNamespace;
106 Namespace attributeNamespace;
107
108 logger.info("start makeTaxa ...");
109
110 TcsRdfImportConfigurator config = state.getConfig();
111 Element root = config.getSourceRoot();
112 boolean success =true;
113
114 Namespace rdfNamespace = config.getRdfNamespace();
115
116 String idNamespace = "TaxonConcept";
117 xmlElementName = "TaxonConcept";
118 elementNamespace = config.getTcNamespace();
119 List<Element> elTaxonConcepts = root.getChildren(xmlElementName, elementNamespace);
120
121 ITaxonService taxonService = getTaxonService();
122
123 //debug names
124 if (false){
125 for (Object nameResource : taxonNameMap.keySet()){
126 System.out.println(nameResource);
127 }
128 }
129
130 int i = 0;
131 //for each taxonConcept
132 for (Element elTaxonConcept : elTaxonConcepts){
133 if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
134
135 //
136 String taxonAbout = elTaxonConcept.getAttributeValue("about", rdfNamespace);
137
138 //hasName
139 xmlElementName = "hasName";
140 elementNamespace = config.getTcNamespace();
141 xmlAttributeName = "resource";
142 attributeNamespace = rdfNamespace;
143 String strNameResource= XmlHelp.getChildAttributeValue(elTaxonConcept, xmlElementName, elementNamespace, xmlAttributeName, attributeNamespace);
144 TaxonNameBase taxonNameBase = taxonNameMap.get(strNameResource);
145 if (taxonNameBase == null){
146 logger.warn("Taxon has no name: " + taxonAbout + "; Resource: " + strNameResource);
147 }
148
149
150 //accordingTo
151 xmlElementName = "accordingTo";
152 elementNamespace = config.getTcNamespace();
153 xmlAttributeName = "resource";
154 attributeNamespace = rdfNamespace;
155 //String strAccordingTo = elTaxonConcept.getChildTextTrim(xmlElementName, elementNamespace);
156 String strAccordingTo = XmlHelp.getChildAttributeValue(elTaxonConcept, xmlElementName, elementNamespace, xmlAttributeName, attributeNamespace);
157
158
159 // //FIXME
160 // String secId = "pub_999999";
161 ReferenceBase sec = referenceMap.get(strAccordingTo);
162 if (sec == null){
163 sec = nomRefMap.get(strAccordingTo);
164 }
165 if (sec == null){
166 logger.warn("sec could not be found in referenceMap or nomRefMap for secId: " + strAccordingTo);
167 }
168
169 TaxonBase taxonBase;
170 Namespace geoNamespace = config.getGeoNamespace();
171 if (hasIsSynonymRelation(elTaxonConcept, rdfNamespace) || isSynonym(elTaxonConcept, config.getPalmNamespace())){
172 //Synonym
173 taxonBase = Synonym.NewInstance(taxonNameBase, sec);
174 List<DescriptionElementBase> geo = makeGeo(elTaxonConcept, geoNamespace, rdfNamespace);
175 if (geo.size() > 0){
176 logger.warn("Synonym (" + taxonAbout + ") has geo description!");
177 }
178 }else{
179 //Taxon
180 Taxon taxon = Taxon.NewInstance(taxonNameBase, sec);
181 List<DescriptionElementBase> geoList = makeGeo(elTaxonConcept, geoNamespace, rdfNamespace);
182 TaxonDescription description = TaxonDescription.NewInstance(taxon);
183 description.addDescriptionSource(taxon.getSec());
184 for (DescriptionElementBase geo: geoList){
185 description.addElement(geo);
186 DescriptionElementSource source = DescriptionElementSource.NewInstance(null, null, taxon.getSec(), null);
187 geo.addSource(source);
188 }
189 taxon.addDescription(description);
190 taxonBase = taxon;
191 }
192
193 Set<String> omitAttributes = null;
194 makeStandardMapper(elTaxonConcept, taxonBase, omitAttributes, standardMappers);
195
196 ImportHelper.setOriginalSource(taxonBase, config.getSourceReference(), taxonAbout, idNamespace);
197 checkAdditionalContents(elTaxonConcept, standardMappers, operationalMappers, unclearMappers);
198
199 taxonMap.put(taxonAbout, taxonBase);
200
201 }
202 //invokeRelations(source, cdmApp, deleteAll, taxonMap, referenceMap);
203 logger.info("saving " + taxonMap.size()+ " taxa ...");
204 taxonService.save(taxonMap.objects());
205 logger.info("end makeTaxa ...");
206 return success;
207 }
208
209
210 /**
211 * @param rdfNamespace
212 * @param elTaxonConcept
213 * @return
214 */
215 private boolean isSynonym(Element elTaxonConcept, Namespace tpalmNamespace) {
216 if (elTaxonConcept == null || ! "TaxonConcept".equalsIgnoreCase(elTaxonConcept.getName()) ){
217 return false;
218 }
219 Element status = elTaxonConcept.getChild("taxonStatus", tpalmNamespace);
220 if (status == null){
221 return false;
222 }else{
223 String statusText = status.getTextNormalize();
224 if ("S".equalsIgnoreCase(statusText)){
225 return true;
226 }else if ("A".equalsIgnoreCase(statusText)){
227 return false;
228 }else if ("C".equalsIgnoreCase(statusText)){
229 return false;
230 }else if ("V".equalsIgnoreCase(statusText)){
231 return false;
232 }else if ("O".equalsIgnoreCase(statusText)){
233 return false;
234 }else if ("U".equalsIgnoreCase(statusText)){
235 return false;
236 }else{
237 logger.warn("Unknown taxon status: " + statusText);
238 return false;
239 }
240 }
241 }
242
243
244 private boolean hasIsSynonymRelation(Element elTaxonConcept, Namespace rdfNamespace){
245 boolean result = false;
246 if (elTaxonConcept == null || ! "TaxonConcept".equalsIgnoreCase(elTaxonConcept.getName()) ){
247 return false;
248 }
249
250 String elName = "relationshipCategory";
251 Filter filter = new ElementFilter(elName, elTaxonConcept.getNamespace());
252 Iterator<Element> relationshipCategories = elTaxonConcept.getDescendants(filter);
253 while (relationshipCategories.hasNext()){
254 Element relationshipCategory = relationshipCategories.next();
255 Attribute resource = relationshipCategory.getAttribute("resource", rdfNamespace);
256 String isSynonymFor = "http://rs.tdwg.org/ontology/voc/TaxonConcept#IsSynonymFor";
257 if (resource != null && isSynonymFor.equalsIgnoreCase(resource.getValue()) ){
258 return true;
259 }
260 }
261 return result;
262 }
263
264 private List<DescriptionElementBase> makeGeo(Element elConcept, Namespace geoNamespace, Namespace rdfNamespace){
265 List<DescriptionElementBase> result = new ArrayList<DescriptionElementBase>();
266 String xmlElementName = "code";
267 List<Element> elGeos = elConcept.getChildren(xmlElementName, geoNamespace);
268
269 int i = 0;
270 //for each geoTag
271 for (Element elGeo : elGeos){
272 //if ((i++ % modCount) == 0){ logger.info("Geocodes handled: " + (i-1));}
273
274 String strGeoRegion = elGeo.getAttributeValue("resource", rdfNamespace);
275 strGeoRegion = strGeoRegion.replace("http://rs.tdwg.org/ontology/voc/GeographicRegion#", "");
276 NamedArea namedArea = TdwgArea.getAreaByTdwgAbbreviation(strGeoRegion);
277 PresenceAbsenceTermBase status = PresenceTerm.PRESENT();
278 DescriptionElementBase distribution = Distribution.NewInstance(namedArea, status);
279 distribution.setFeature(Feature.DISTRIBUTION());
280 //System.out.println(namedArea);
281
282 result.add(distribution);
283 }
284 return result;
285 }
286
287 /* (non-Javadoc)
288 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
289 */
290 protected boolean isIgnore(TcsRdfImportState state){
291 return ! state.getConfig().isDoTaxa();
292 }
293
294
295 }