Project

General

Profile

Download (10.4 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy 
4
* http://www.e-taxonomy.eu
5
* 
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.tcsrdf;
10

    
11
import java.util.ArrayList;
12
import java.util.Iterator;
13
import java.util.List;
14
import java.util.Set;
15

    
16
import org.apache.log4j.Logger;
17
import org.jdom.Attribute;
18
import org.jdom.Element;
19
import org.jdom.Namespace;
20
import org.jdom.filter.ElementFilter;
21
import org.jdom.filter.Filter;
22
import org.springframework.stereotype.Component;
23

    
24
import eu.etaxonomy.cdm.api.service.ITaxonService;
25
import eu.etaxonomy.cdm.common.XmlHelp;
26
import eu.etaxonomy.cdm.io.common.ICdmIO;
27
import eu.etaxonomy.cdm.io.common.ImportHelper;
28
import eu.etaxonomy.cdm.io.common.MapWrapper;
29
import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
30
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
31
import eu.etaxonomy.cdm.model.description.Distribution;
32
import eu.etaxonomy.cdm.model.description.Feature;
33
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;
34
import eu.etaxonomy.cdm.model.description.PresenceTerm;
35
import eu.etaxonomy.cdm.model.description.TaxonDescription;
36
import eu.etaxonomy.cdm.model.location.NamedArea;
37
import eu.etaxonomy.cdm.model.location.TdwgArea;
38
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
39
import eu.etaxonomy.cdm.model.reference.Reference;
40
import eu.etaxonomy.cdm.model.taxon.Synonym;
41
import eu.etaxonomy.cdm.model.taxon.Taxon;
42
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
43

    
44
/**
45
 * @author a.mueller
46
 * @created 29.05.2008
47
 * @version 1.0
48
 */
49
@Component
50
public class TcsRdfTaxonImport  extends TcsRdfImportBase implements ICdmIO<TcsRdfImportState> {
51
	private static final Logger logger = Logger.getLogger(TcsRdfTaxonImport.class);
52

    
53
	private static int modCount = 30000;
54
	
55
	public TcsRdfTaxonImport(){
56
		super();
57
	}
58
	
59
	
60
	@Override
61
	public boolean doCheck(TcsRdfImportState state){
62
		boolean result = true;
63
		logger.warn("Checking for Taxa not yet implemented");
64
		//result &= checkArticlesWithoutJournal(bmiConfig);
65
		//result &= checkPartOfJournal(bmiConfig);
66
		
67
		return result;
68
	}
69
	
70
	protected static CdmSingleAttributeXmlMapperBase[] standardMappers = new CdmSingleAttributeXmlMapperBase[]{
71
//		new CdmTextElementMapper("genusPart", "genusOrUninomial")
72
	
73
	};
74

    
75
	
76
	protected static CdmSingleAttributeXmlMapperBase[] operationalMappers = new CdmSingleAttributeXmlMapperBase[]{
77
		 new CdmUnclearMapper("hasName")
78
		,new CdmUnclearMapper("hasName")
79
		, new CdmUnclearMapper("accordingTo")
80
		, new CdmUnclearMapper("hasRelationship")
81
		, new CdmUnclearMapper("code", nsTgeo)	
82
	};
83
	
84
	protected static CdmSingleAttributeXmlMapperBase[] unclearMappers = new CdmSingleAttributeXmlMapperBase[]{
85
		new CdmUnclearMapper("primary")
86
		, new CdmUnclearMapper("note", nsTcom)	
87
		, new CdmUnclearMapper("taxonStatus", nsTpalm)
88
		
89
		, new CdmUnclearMapper("TaxonName", nsTn)	
90
		, new CdmUnclearMapper("dateOfEntry", nsTpalm)	
91
	};
92
	
93
	
94
	
95
	@Override
96
	protected void doInvoke(TcsRdfImportState state){
97
		
98
		MapWrapper<TaxonBase> taxonMap = (MapWrapper<TaxonBase>)state.getStore(ICdmIO.TAXON_STORE);
99
		MapWrapper<TaxonNameBase> taxonNameMap = (MapWrapper<TaxonNameBase>)state.getStore(ICdmIO.TAXONNAME_STORE);
100
		MapWrapper<Reference> referenceMap = (MapWrapper<Reference>)state.getStore(ICdmIO.REFERENCE_STORE);
101
		MapWrapper<Reference> nomRefMap = (MapWrapper<Reference>)state.getStore(ICdmIO.NOMREF_STORE);
102
		
103
		String xmlElementName;
104
		String xmlAttributeName;
105
		Namespace elementNamespace;
106
		Namespace attributeNamespace;
107
		
108
		logger.info("start makeTaxa ...");
109
		
110
		TcsRdfImportConfigurator config = state.getConfig();
111
		Element root = config.getSourceRoot();
112
		
113
		Namespace rdfNamespace = config.getRdfNamespace();
114
		
115
		String idNamespace = "TaxonConcept";
116
		xmlElementName = "TaxonConcept";
117
		elementNamespace = config.getTcNamespace();
118
		List<Element> elTaxonConcepts = root.getChildren(xmlElementName, elementNamespace);
119

    
120
		ITaxonService taxonService = getTaxonService();
121
		
122
		//debug names
123
		if (false){
124
			for (Object nameResource : taxonNameMap.keySet()){
125
				System.out.println(nameResource);
126
			}
127
		}
128
		
129
		int i = 0;
130
		//for each taxonConcept
131
		for (Element elTaxonConcept : elTaxonConcepts){
132
			if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
133
			
134
			//
135
			String taxonAbout = elTaxonConcept.getAttributeValue("about", rdfNamespace);
136
			
137
			//hasName
138
			xmlElementName = "hasName";
139
			elementNamespace = config.getTcNamespace();
140
			xmlAttributeName = "resource";
141
			attributeNamespace = rdfNamespace;
142
			String strNameResource= XmlHelp.getChildAttributeValue(elTaxonConcept, xmlElementName, elementNamespace, xmlAttributeName, attributeNamespace);
143
			TaxonNameBase taxonNameBase = taxonNameMap.get(strNameResource);
144
			if (taxonNameBase == null){
145
				logger.warn("Taxon has no name: " + taxonAbout + "; Resource: " + strNameResource);
146
			}
147
			
148
			
149
			//accordingTo
150
			xmlElementName = "accordingTo";
151
			elementNamespace = config.getTcNamespace();
152
			xmlAttributeName = "resource";
153
			attributeNamespace = rdfNamespace;
154
			//String strAccordingTo = elTaxonConcept.getChildTextTrim(xmlElementName, elementNamespace);
155
			String strAccordingTo = XmlHelp.getChildAttributeValue(elTaxonConcept, xmlElementName, elementNamespace, xmlAttributeName, attributeNamespace);
156
			
157
			
158
//			//FIXME
159
//			String secId = "pub_999999";
160
			Reference sec = referenceMap.get(strAccordingTo);
161
			if (sec == null){
162
				sec = nomRefMap.get(strAccordingTo);
163
			}
164
			if (sec == null){
165
				logger.warn("sec could not be found in referenceMap or nomRefMap for secId: " + strAccordingTo);
166
			}
167
			
168
			TaxonBase taxonBase;
169
			Namespace geoNamespace = config.getGeoNamespace();
170
			if (hasIsSynonymRelation(elTaxonConcept, rdfNamespace) || isSynonym(elTaxonConcept, config.getPalmNamespace())){
171
				//Synonym
172
				taxonBase = Synonym.NewInstance(taxonNameBase, sec);
173
				List<DescriptionElementBase> geo = makeGeo(elTaxonConcept, geoNamespace, rdfNamespace);
174
				if (geo.size() > 0){
175
					logger.warn("Synonym (" + taxonAbout + ") has geo description!");
176
				}
177
			}else{
178
				//Taxon
179
				Taxon taxon = Taxon.NewInstance(taxonNameBase, sec);
180
				List<DescriptionElementBase> geoList = makeGeo(elTaxonConcept, geoNamespace, rdfNamespace);
181
				TaxonDescription description = TaxonDescription.NewInstance(taxon);
182
				description.addDescriptionSource(taxon.getSec());
183
				for (DescriptionElementBase geo: geoList){
184
					description.addElement(geo);
185
					DescriptionElementSource source = DescriptionElementSource.NewInstance(null, null, taxon.getSec(), null);
186
					geo.addSource(source);
187
				}
188
				taxon.addDescription(description);
189
				taxonBase = taxon;
190
			}
191
			
192
			Set<String> omitAttributes = null;
193
			makeStandardMapper(elTaxonConcept, taxonBase, omitAttributes, standardMappers);
194

    
195
			ImportHelper.setOriginalSource(taxonBase, config.getSourceReference(), taxonAbout, idNamespace);
196
			checkAdditionalContents(elTaxonConcept, standardMappers, operationalMappers, unclearMappers);
197
			
198
			taxonMap.put(taxonAbout, taxonBase);
199
			
200
		}
201
		//invokeRelations(source, cdmApp, deleteAll, taxonMap, referenceMap);
202
		logger.info("saving " + taxonMap.size()+ " taxa ...");
203
		taxonService.save(taxonMap.objects());
204
		logger.info("end makeTaxa ...");
205
		return;
206
	}
207
	
208
	
209
	/**
210
	 * @param rdfNamespace 
211
	 * @param elTaxonConcept 
212
	 * @return
213
	 */
214
	private boolean isSynonym(Element elTaxonConcept, Namespace tpalmNamespace) {
215
		if (elTaxonConcept == null || ! "TaxonConcept".equalsIgnoreCase(elTaxonConcept.getName()) ){
216
			return false;
217
		}
218
		Element status = elTaxonConcept.getChild("taxonStatus", tpalmNamespace);
219
		if (status == null){
220
			return false;
221
		}else{
222
			String statusText = status.getTextNormalize();
223
			if ("S".equalsIgnoreCase(statusText)){
224
				return true;
225
			}else if ("A".equalsIgnoreCase(statusText)){
226
				return false;
227
			}else if ("C".equalsIgnoreCase(statusText)){
228
				return false;
229
			}else if ("V".equalsIgnoreCase(statusText)){
230
				return false;
231
			}else if ("O".equalsIgnoreCase(statusText)){
232
				return false;
233
			}else if ("U".equalsIgnoreCase(statusText)){
234
				return false;
235
			}else{
236
				logger.warn("Unknown taxon status: " +  statusText);
237
				return false;
238
			}
239
		}
240
	}
241

    
242

    
243
	private boolean hasIsSynonymRelation(Element elTaxonConcept, Namespace rdfNamespace){
244
		boolean result = false;
245
		if (elTaxonConcept == null || ! "TaxonConcept".equalsIgnoreCase(elTaxonConcept.getName()) ){
246
			return false;
247
		}
248
		
249
		String elName = "relationshipCategory";
250
		Filter filter = new ElementFilter(elName, elTaxonConcept.getNamespace());
251
		Iterator<Element> relationshipCategories = elTaxonConcept.getDescendants(filter);
252
		while (relationshipCategories.hasNext()){
253
			Element relationshipCategory = relationshipCategories.next();
254
			Attribute resource = relationshipCategory.getAttribute("resource", rdfNamespace);
255
			String isSynonymFor = "http://rs.tdwg.org/ontology/voc/TaxonConcept#IsSynonymFor";
256
			if (resource != null && isSynonymFor.equalsIgnoreCase(resource.getValue()) ){
257
				return true;
258
			}
259
		}
260
		return result;
261
	}
262
	
263
	private List<DescriptionElementBase> makeGeo(Element elConcept, Namespace geoNamespace, Namespace rdfNamespace){
264
		List<DescriptionElementBase> result = new ArrayList<DescriptionElementBase>();
265
		String xmlElementName = "code";
266
		List<Element> elGeos = elConcept.getChildren(xmlElementName, geoNamespace);
267

    
268
		int i = 0;
269
		//for each geoTag
270
		for (Element elGeo : elGeos){
271
			//if ((i++ % modCount) == 0){ logger.info("Geocodes handled: " + (i-1));}
272
			
273
			String strGeoRegion = elGeo.getAttributeValue("resource", rdfNamespace);
274
			strGeoRegion = strGeoRegion.replace("http://rs.tdwg.org/ontology/voc/GeographicRegion#", "");
275
			NamedArea namedArea = TdwgArea.getAreaByTdwgAbbreviation(strGeoRegion);
276
			PresenceAbsenceTermBase status = PresenceTerm.PRESENT();
277
			DescriptionElementBase distribution = Distribution.NewInstance(namedArea, status);
278
			distribution.setFeature(Feature.DISTRIBUTION());
279
			//System.out.println(namedArea);
280
			
281
			result.add(distribution);
282
		}
283
		return result;
284
	}
285
	
286
	/* (non-Javadoc)
287
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
288
	 */
289
	protected boolean isIgnore(TcsRdfImportState state){
290
		return ! state.getConfig().isDoTaxa();
291
	}
292

    
293

    
294
}
(9-9/13)