Project

General

Profile

Download (7.8 KB) Statistics
| Branch: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2007 EDIT
4
* European Distributed Institute of Taxonomy 
5
* http://www.e-taxonomy.eu
6
* 
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10

    
11
package eu.etaxonomy.cdm.app.wp6.diptera;
12

    
13
import java.util.HashMap;
14
import java.util.HashSet;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18

    
19
import org.apache.log4j.Logger;
20
import org.springframework.transaction.TransactionStatus;
21

    
22
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
23
import eu.etaxonomy.cdm.api.service.INameService;
24
import eu.etaxonomy.cdm.api.service.pager.Pager;
25
import eu.etaxonomy.cdm.app.common.CdmDestinations;
26
import eu.etaxonomy.cdm.database.DbSchemaValidation;
27
import eu.etaxonomy.cdm.database.ICdmDataSource;
28
import eu.etaxonomy.cdm.model.common.CdmBase;
29
import eu.etaxonomy.cdm.model.common.Language;
30
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
31
import eu.etaxonomy.cdm.model.description.DescriptionBase;
32
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
33
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
34
import eu.etaxonomy.cdm.model.description.Feature;
35
import eu.etaxonomy.cdm.model.description.TaxonDescription;
36
import eu.etaxonomy.cdm.model.description.TextData;
37
import eu.etaxonomy.cdm.model.name.NonViralName;
38
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
39
import eu.etaxonomy.cdm.model.taxon.Taxon;
40
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
41

    
42
/**
43
 * @author a.mueller
44
 * @created 01.10.2009
45
 * @version 1.0
46
 */
47
public class DipteraPostImportUpdater {
48
	private static final Logger logger = Logger.getLogger(DipteraPostImportUpdater.class);
49

    
50
	static final ICdmDataSource cdmDestination = CdmDestinations.localH2Palmae();
51
	
52
	/**
53
	 * This method updateds the citation text by deleting <code>OriginalName</code> tags and 
54
	 * adding the original name to the source either as a link to an existing taxon name
55
	 * or as a string. The later becomes true if there is not exactly one matching name
56
	 * @param dataSource
57
	 * @return
58
	 */
59
	public boolean updateCitations(ICdmDataSource dataSource) {
60
		try{
61
			logger.warn("start updating citations");
62
			boolean result = true;
63
			CdmApplicationController cdmApp = CdmApplicationController.NewInstance(dataSource, DbSchemaValidation.VALIDATE);
64
			Set<DescriptionElementBase> citationsToSave = new HashSet<DescriptionElementBase>();
65
			TransactionStatus tx = cdmApp.startTransaction();
66

    
67
			logger.warn("start updating citations ... application context started");
68
			int modCount = 100;
69
			int page = 0;
70
			int count = cdmApp.getTaxonService().count(Taxon.class);
71
			List<TaxonBase> taxonList = cdmApp.getTaxonService().list(Taxon.class, 100000, page, null, null);
72
			List<TaxonNameBase> nameList = cdmApp.getNameService().list(null, 100000, page, null, null);
73
			Map<String, TaxonNameBase> nameMap = new HashMap<String, TaxonNameBase>();
74
			Map<String, TaxonNameBase> nameDuplicateMap = new HashMap<String, TaxonNameBase>();
75
			fillNameMaps(nameList, nameMap, nameDuplicateMap);
76
			
77
			int i = 0;
78
			
79
			Taxon taxon;
80
			for (TaxonBase taxonBase : taxonList){
81
				if ((i++ % modCount) == 0){ logger.warn("taxa handled: " + (i-1));}
82
				
83
				if (taxonBase.isInstanceOf(Taxon.class)){
84
					taxon = CdmBase.deproxy(taxonBase, Taxon.class);
85
					Set<TextData> citations = getCitations(taxon);
86
					for (TextData citation : citations){
87
						Language language = Language.DEFAULT();
88
						String text = citation.getText(language);
89
						String originalNameString = parseOriginalNameString(text);
90
						String newText = parseNewText(text);
91
						citation.removeText(language);
92
						citation.putText(language, newText);
93
						TaxonNameBase<?,?> scientificName = getScientificName(originalNameString, nameMap, nameDuplicateMap);
94
						
95
						Set<DescriptionElementSource> sources = citation.getSources();
96
						if (sources.size() > 1){
97
							logger.warn("There are more then 1 sources for a description");
98
						}else if (sources.size() == 0){
99
							DescriptionElementSource source = DescriptionElementSource.NewInstance(OriginalSourceType.PrimaryTaxonomicSource);
100
							citation.addSource(source);
101
							sources = citation.getSources();
102
						}
103
						for (DescriptionElementSource source : sources){
104
							if (scientificName != null){
105
								source.setNameUsedInSource(scientificName);
106
							}else{
107
								source.setOriginalNameString(originalNameString);
108
							}
109
						}
110
						
111
						citationsToSave.add(citation);
112
					}
113
				}
114
			}
115
				
116
			cdmApp.getDescriptionService().saveDescriptionElement(citationsToSave);
117
			//commit
118
			cdmApp.commitTransaction(tx);
119
			logger.warn("Citations updated!");
120
			return result;
121
		} catch (Exception e) {
122
			e.printStackTrace();
123
			logger.error("ERROR in citation update");
124
			return false;
125
		}
126
		
127
	}
128
	
129
	public boolean updateCollections(ICdmDataSource dataSource){
130
		DipteraCollectionImport collectionImport = new DipteraCollectionImport();
131
		return collectionImport.invoke(dataSource);
132
	}
133

    
134

    
135
	private void fillNameMaps(List<TaxonNameBase> nameList, Map<String, TaxonNameBase> nameMap, Map<String, TaxonNameBase> duplicateMap) {
136
		for (TaxonNameBase name : nameList){
137
			NonViralName nvn = name.deproxy(name, NonViralName.class);
138
			String nameCache = nvn.getNameCache();
139
			if (nameMap.containsKey(nameCache)){
140
				duplicateMap.put(nameCache, nvn);
141
			}else{
142
				nameMap.put(nameCache, nvn);
143
			}
144
		}
145
	}
146
	
147
	
148
	private TaxonNameBase getScientificName(String originalNameString, Map<String, TaxonNameBase> nameMap, Map<String, TaxonNameBase> nameDuplicateMap) {
149
		originalNameString = originalNameString.trim();
150
		TaxonNameBase result = nameMap.get(originalNameString);
151
		if (nameDuplicateMap.containsKey(originalNameString)){
152
			result = null;
153
		}
154
		return result;
155
	}
156

    
157
	private TaxonNameBase getScientificName(String originalNameString, INameService nameService) {
158
		Pager<TaxonNameBase> names = nameService.findByName(null, originalNameString, null, null, null, null, null, null);
159
		if (names.getCount() != 1){
160
			return null;
161
		}else{
162
			return names.getRecords().get(0);
163
		}
164
	}
165

    
166
	private String parseOriginalNameString(String text) {
167
		String originalName = "<OriginalName>";
168
		int start = text.indexOf(originalName);
169
		int end = text.indexOf("</OriginalName>");
170
		if (start >-1 ){
171
			text = text.substring(start + originalName.length(), end);
172
		}
173
		text = text.trim();
174
		return text;
175
	}
176

    
177
	private String parseNewText(String text) {
178
		int start = text.indexOf("</OriginalName>");
179
		text = text.substring(start + "</OriginalName>".length());
180
		text = text.trim();
181
		if (text.startsWith(":")){
182
			text = text.substring(1);
183
		}
184
		text = text.trim();
185
		return text;
186
	}
187

    
188
	private Set<TextData> getCitations(Taxon taxon) {
189
		Set<TextData> result = new HashSet<TextData>();
190
		Set<TaxonDescription> descriptions = taxon.getDescriptions();
191
		for (DescriptionBase description : descriptions){
192
			Set<DescriptionElementBase> elements = description.getElements();
193
			for (DescriptionElementBase element : elements){
194
				Feature feature = element.getFeature();
195
				if (feature.equals(Feature.CITATION())){
196
					if (! element.isInstanceOf(TextData.class)){
197
						logger.warn("Citation is not of class TextData but " + element.getClass().getSimpleName());
198
					}else{
199
						TextData textData = element.deproxy(element, TextData.class);
200
						result.add(textData);
201
					}
202
				}
203
			}
204
		}
205
		return result;
206
	}
207

    
208

    
209

    
210
	
211
	/**
212
	 * @param args
213
	 */
214
	public static void main(String[] args) {
215
		DipteraPostImportUpdater updater = new DipteraPostImportUpdater();
216
		try {
217
			updater.updateCitations(cdmDestination);
218
		} catch (Exception e) {
219
			e.printStackTrace();
220
			logger.error("ERROR in feature tree update");
221
		}
222
	}
223

    
224
}
(4-4/4)