07fb3220d04de7b91c98becf66516838bc6b9a19
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / tcsrdf / TcsRdfTaxonImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.tcsrdf;
10
11 import java.util.ArrayList;
12 import java.util.Iterator;
13 import java.util.List;
14 import java.util.Set;
15
16 import org.apache.log4j.Logger;
17 import org.jdom.Attribute;
18 import org.jdom.Element;
19 import org.jdom.Namespace;
20 import org.jdom.filter.ElementFilter;
21 import org.jdom.filter.Filter;
22 import org.springframework.stereotype.Component;
23
24 import eu.etaxonomy.cdm.api.service.ITaxonService;
25 import eu.etaxonomy.cdm.common.XmlHelp;
26 import eu.etaxonomy.cdm.io.common.ICdmIO;
27 import eu.etaxonomy.cdm.io.common.ImportHelper;
28 import eu.etaxonomy.cdm.io.common.MapWrapper;
29 import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
30 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
31 import eu.etaxonomy.cdm.model.description.Distribution;
32 import eu.etaxonomy.cdm.model.description.Feature;
33 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;
34 import eu.etaxonomy.cdm.model.description.PresenceTerm;
35 import eu.etaxonomy.cdm.model.description.TaxonDescription;
36 import eu.etaxonomy.cdm.model.location.NamedArea;
37 import eu.etaxonomy.cdm.model.location.TdwgArea;
38 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
39 import eu.etaxonomy.cdm.model.reference.Reference;
40 import eu.etaxonomy.cdm.model.taxon.Synonym;
41 import eu.etaxonomy.cdm.model.taxon.Taxon;
42 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
43
44 /**
45 * @author a.mueller
46 * @created 29.05.2008
47 * @version 1.0
48 */
49 @Component
50 public class TcsRdfTaxonImport extends TcsRdfImportBase implements ICdmIO<TcsRdfImportState> {
51 private static final Logger logger = Logger.getLogger(TcsRdfTaxonImport.class);
52
53 private static int modCount = 30000;
54
55 public TcsRdfTaxonImport(){
56 super();
57 }
58
59
60 @Override
61 public boolean doCheck(TcsRdfImportState state){
62 boolean result = true;
63 logger.warn("Checking for Taxa not yet implemented");
64 //result &= checkArticlesWithoutJournal(bmiConfig);
65 //result &= checkPartOfJournal(bmiConfig);
66
67 return result;
68 }
69
70 protected static CdmSingleAttributeXmlMapperBase[] standardMappers = new CdmSingleAttributeXmlMapperBase[]{
71 // new CdmTextElementMapper("genusPart", "genusOrUninomial")
72
73 };
74
75
76 protected static CdmSingleAttributeXmlMapperBase[] operationalMappers = new CdmSingleAttributeXmlMapperBase[]{
77 new CdmUnclearMapper("hasName")
78 ,new CdmUnclearMapper("hasName")
79 , new CdmUnclearMapper("accordingTo")
80 , new CdmUnclearMapper("hasRelationship")
81 , new CdmUnclearMapper("code", nsTgeo)
82 };
83
84 protected static CdmSingleAttributeXmlMapperBase[] unclearMappers = new CdmSingleAttributeXmlMapperBase[]{
85 new CdmUnclearMapper("primary")
86 , new CdmUnclearMapper("note", nsTcom)
87 , new CdmUnclearMapper("taxonStatus", nsTpalm)
88
89 , new CdmUnclearMapper("TaxonName", nsTn)
90 , new CdmUnclearMapper("dateOfEntry", nsTpalm)
91 };
92
93
94
95 @Override
96 protected void doInvoke(TcsRdfImportState state){
97
98 MapWrapper<TaxonBase> taxonMap = (MapWrapper<TaxonBase>)state.getStore(ICdmIO.TAXON_STORE);
99 MapWrapper<TaxonNameBase> taxonNameMap = (MapWrapper<TaxonNameBase>)state.getStore(ICdmIO.TAXONNAME_STORE);
100 MapWrapper<Reference> referenceMap = (MapWrapper<Reference>)state.getStore(ICdmIO.REFERENCE_STORE);
101 MapWrapper<Reference> nomRefMap = (MapWrapper<Reference>)state.getStore(ICdmIO.NOMREF_STORE);
102
103 String xmlElementName;
104 String xmlAttributeName;
105 Namespace elementNamespace;
106 Namespace attributeNamespace;
107
108 logger.info("start makeTaxa ...");
109
110 TcsRdfImportConfigurator config = state.getConfig();
111 Element root = config.getSourceRoot();
112
113 Namespace rdfNamespace = config.getRdfNamespace();
114
115 String idNamespace = "TaxonConcept";
116 xmlElementName = "TaxonConcept";
117 elementNamespace = config.getTcNamespace();
118 List<Element> elTaxonConcepts = root.getChildren(xmlElementName, elementNamespace);
119
120 ITaxonService taxonService = getTaxonService();
121
122 //debug names
123 if (false){
124 for (Object nameResource : taxonNameMap.keySet()){
125 System.out.println(nameResource);
126 }
127 }
128
129 int i = 0;
130 //for each taxonConcept
131 for (Element elTaxonConcept : elTaxonConcepts){
132 if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
133
134 //
135 String taxonAbout = elTaxonConcept.getAttributeValue("about", rdfNamespace);
136
137 //hasName
138 xmlElementName = "hasName";
139 elementNamespace = config.getTcNamespace();
140 xmlAttributeName = "resource";
141 attributeNamespace = rdfNamespace;
142 String strNameResource= XmlHelp.getChildAttributeValue(elTaxonConcept, xmlElementName, elementNamespace, xmlAttributeName, attributeNamespace);
143 TaxonNameBase taxonNameBase = taxonNameMap.get(strNameResource);
144 if (taxonNameBase == null){
145 logger.warn("Taxon has no name: " + taxonAbout + "; Resource: " + strNameResource);
146 }
147
148
149 //accordingTo
150 xmlElementName = "accordingTo";
151 elementNamespace = config.getTcNamespace();
152 xmlAttributeName = "resource";
153 attributeNamespace = rdfNamespace;
154 //String strAccordingTo = elTaxonConcept.getChildTextTrim(xmlElementName, elementNamespace);
155 String strAccordingTo = XmlHelp.getChildAttributeValue(elTaxonConcept, xmlElementName, elementNamespace, xmlAttributeName, attributeNamespace);
156
157
158 // //FIXME
159 // String secId = "pub_999999";
160 Reference sec = referenceMap.get(strAccordingTo);
161 if (sec == null){
162 sec = nomRefMap.get(strAccordingTo);
163 }
164 if (sec == null){
165 logger.warn("sec could not be found in referenceMap or nomRefMap for secId: " + strAccordingTo);
166 }
167
168 TaxonBase taxonBase;
169 Namespace geoNamespace = config.getGeoNamespace();
170 if (hasIsSynonymRelation(elTaxonConcept, rdfNamespace) || isSynonym(elTaxonConcept, config.getPalmNamespace())){
171 //Synonym
172 taxonBase = Synonym.NewInstance(taxonNameBase, sec);
173 List<DescriptionElementBase> geo = makeGeo(elTaxonConcept, geoNamespace, rdfNamespace);
174 if (geo.size() > 0){
175 logger.warn("Synonym (" + taxonAbout + ") has geo description!");
176 }
177 }else{
178 //Taxon
179 Taxon taxon = Taxon.NewInstance(taxonNameBase, sec);
180 List<DescriptionElementBase> geoList = makeGeo(elTaxonConcept, geoNamespace, rdfNamespace);
181 TaxonDescription description = TaxonDescription.NewInstance(taxon);
182 description.addSource(null, null, taxon.getSec(), null);
183 for (DescriptionElementBase geo: geoList){
184 description.addElement(geo);
185 DescriptionElementSource source = DescriptionElementSource.NewInstance(null, null, taxon.getSec(), null);
186 geo.addSource(source);
187 }
188 taxon.addDescription(description);
189 taxonBase = taxon;
190 }
191
192 Set<String> omitAttributes = null;
193 makeStandardMapper(elTaxonConcept, taxonBase, omitAttributes, standardMappers);
194
195 ImportHelper.setOriginalSource(taxonBase, config.getSourceReference(), taxonAbout, idNamespace);
196 checkAdditionalContents(elTaxonConcept, standardMappers, operationalMappers, unclearMappers);
197
198 taxonMap.put(taxonAbout, taxonBase);
199
200 }
201 //invokeRelations(source, cdmApp, deleteAll, taxonMap, referenceMap);
202 logger.info("saving " + taxonMap.size()+ " taxa ...");
203 taxonService.save(taxonMap.objects());
204 logger.info("end makeTaxa ...");
205 return;
206 }
207
208
209 /**
210 * @param rdfNamespace
211 * @param elTaxonConcept
212 * @return
213 */
214 private boolean isSynonym(Element elTaxonConcept, Namespace tpalmNamespace) {
215 if (elTaxonConcept == null || ! "TaxonConcept".equalsIgnoreCase(elTaxonConcept.getName()) ){
216 return false;
217 }
218 Element status = elTaxonConcept.getChild("taxonStatus", tpalmNamespace);
219 if (status == null){
220 return false;
221 }else{
222 String statusText = status.getTextNormalize();
223 if ("S".equalsIgnoreCase(statusText)){
224 return true;
225 }else if ("A".equalsIgnoreCase(statusText)){
226 return false;
227 }else if ("C".equalsIgnoreCase(statusText)){
228 return false;
229 }else if ("V".equalsIgnoreCase(statusText)){
230 return false;
231 }else if ("O".equalsIgnoreCase(statusText)){
232 return false;
233 }else if ("U".equalsIgnoreCase(statusText)){
234 return false;
235 }else{
236 logger.warn("Unknown taxon status: " + statusText);
237 return false;
238 }
239 }
240 }
241
242
243 private boolean hasIsSynonymRelation(Element elTaxonConcept, Namespace rdfNamespace){
244 boolean result = false;
245 if (elTaxonConcept == null || ! "TaxonConcept".equalsIgnoreCase(elTaxonConcept.getName()) ){
246 return false;
247 }
248
249 String elName = "relationshipCategory";
250 Filter filter = new ElementFilter(elName, elTaxonConcept.getNamespace());
251 Iterator<Element> relationshipCategories = elTaxonConcept.getDescendants(filter);
252 while (relationshipCategories.hasNext()){
253 Element relationshipCategory = relationshipCategories.next();
254 Attribute resource = relationshipCategory.getAttribute("resource", rdfNamespace);
255 String isSynonymFor = "http://rs.tdwg.org/ontology/voc/TaxonConcept#IsSynonymFor";
256 if (resource != null && isSynonymFor.equalsIgnoreCase(resource.getValue()) ){
257 return true;
258 }
259 }
260 return result;
261 }
262
263 private List<DescriptionElementBase> makeGeo(Element elConcept, Namespace geoNamespace, Namespace rdfNamespace){
264 List<DescriptionElementBase> result = new ArrayList<DescriptionElementBase>();
265 String xmlElementName = "code";
266 List<Element> elGeos = elConcept.getChildren(xmlElementName, geoNamespace);
267
268 int i = 0;
269 //for each geoTag
270 for (Element elGeo : elGeos){
271 //if ((i++ % modCount) == 0){ logger.info("Geocodes handled: " + (i-1));}
272
273 String strGeoRegion = elGeo.getAttributeValue("resource", rdfNamespace);
274 strGeoRegion = strGeoRegion.replace("http://rs.tdwg.org/ontology/voc/GeographicRegion#", "");
275 NamedArea namedArea = TdwgArea.getAreaByTdwgAbbreviation(strGeoRegion);
276 PresenceAbsenceTermBase status = PresenceTerm.PRESENT();
277 DescriptionElementBase distribution = Distribution.NewInstance(namedArea, status);
278 distribution.setFeature(Feature.DISTRIBUTION());
279 //System.out.println(namedArea);
280
281 result.add(distribution);
282 }
283 return result;
284 }
285
286 /* (non-Javadoc)
287 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
288 */
289 protected boolean isIgnore(TcsRdfImportState state){
290 return ! state.getConfig().isDoTaxa();
291 }
292
293
294 }