Project

General

Profile

Download (7.7 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2007 EDIT
4
* European Distributed Institute of Taxonomy 
5
* http://www.e-taxonomy.eu
6
* 
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10

    
11
package eu.etaxonomy.cdm.app.wp6.diptera;
12

    
13
import java.util.HashMap;
14
import java.util.HashSet;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18

    
19
import org.apache.log4j.Logger;
20
import org.springframework.transaction.TransactionStatus;
21

    
22
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
23
import eu.etaxonomy.cdm.api.service.INameService;
24
import eu.etaxonomy.cdm.api.service.pager.Pager;
25
import eu.etaxonomy.cdm.app.common.CdmDestinations;
26
import eu.etaxonomy.cdm.database.DbSchemaValidation;
27
import eu.etaxonomy.cdm.database.ICdmDataSource;
28
import eu.etaxonomy.cdm.model.common.CdmBase;
29
import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
30
import eu.etaxonomy.cdm.model.common.Language;
31
import eu.etaxonomy.cdm.model.description.DescriptionBase;
32
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
33
import eu.etaxonomy.cdm.model.description.Feature;
34
import eu.etaxonomy.cdm.model.description.TaxonDescription;
35
import eu.etaxonomy.cdm.model.description.TextData;
36
import eu.etaxonomy.cdm.model.name.NonViralName;
37
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
38
import eu.etaxonomy.cdm.model.taxon.Taxon;
39
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
40

    
41
/**
42
 * @author a.mueller
43
 * @created 01.10.2009
44
 * @version 1.0
45
 */
46
public class DipteraPostImportUpdater {
47
	private static final Logger logger = Logger.getLogger(DipteraPostImportUpdater.class);
48

    
49
	static final ICdmDataSource cdmDestination = CdmDestinations.localH2Palmae();
50
	
51
	/**
52
	 * This method updateds the citation text by deleting <code>OriginalName</code> tags and 
53
	 * adding the original name to the source either as a link to an existing taxon name
54
	 * or as a string. The later becomes true if there is not exactly one matching name
55
	 * @param dataSource
56
	 * @return
57
	 */
58
	public boolean updateCitations(ICdmDataSource dataSource) {
59
		try{
60
			logger.warn("start updating citations");
61
			boolean result = true;
62
			CdmApplicationController cdmApp = CdmApplicationController.NewInstance(dataSource, DbSchemaValidation.VALIDATE);
63
			Set<DescriptionElementBase> citationsToSave = new HashSet<DescriptionElementBase>();
64
			TransactionStatus tx = cdmApp.startTransaction();
65

    
66
			logger.warn("start updating citations ... application context started");
67
			int modCount = 100;
68
			int page = 0;
69
			int count = cdmApp.getTaxonService().count(Taxon.class);
70
			List<TaxonBase> taxonList = cdmApp.getTaxonService().list(Taxon.class, 100000, page, null, null);
71
			List<TaxonNameBase> nameList = cdmApp.getNameService().list(null, 100000, page, null, null);
72
			Map<String, TaxonNameBase> nameMap = new HashMap<String, TaxonNameBase>();
73
			Map<String, TaxonNameBase> nameDuplicateMap = new HashMap<String, TaxonNameBase>();
74
			fillNameMaps(nameList, nameMap, nameDuplicateMap);
75
			
76
			int i = 0;
77
			
78
			Taxon taxon;
79
			for (TaxonBase taxonBase : taxonList){
80
				if ((i++ % modCount) == 0){ logger.warn("taxa handled: " + (i-1));}
81
				
82
				if (taxonBase.isInstanceOf(Taxon.class)){
83
					taxon = CdmBase.deproxy(taxonBase, Taxon.class);
84
					Set<TextData> citations = getCitations(taxon);
85
					for (TextData citation : citations){
86
						Language language = Language.DEFAULT();
87
						String text = citation.getText(language);
88
						String originalNameString = parseOriginalNameString(text);
89
						String newText = parseNewText(text);
90
						citation.removeText(language);
91
						citation.putText(newText, language);
92
						TaxonNameBase scientificName = getScientificName(originalNameString, nameMap, nameDuplicateMap);
93
						
94
						Set<DescriptionElementSource> sources = citation.getSources();
95
						if (sources.size() > 1){
96
							logger.warn("There are more then 1 sources for a description");
97
						}else if (sources.size() == 0){
98
							DescriptionElementSource source = DescriptionElementSource.NewInstance();
99
							citation.addSource(source);
100
							sources = citation.getSources();
101
						}
102
						for (DescriptionElementSource source : sources){
103
							if (scientificName != null){
104
								source.setNameUsedInSource(scientificName);
105
							}else{
106
								source.setOriginalNameString(originalNameString);
107
							}
108
						}
109
						
110
						citationsToSave.add(citation);
111
					}
112
				}
113
			}
114
				
115
			cdmApp.getDescriptionService().saveDescriptionElement(citationsToSave);
116
			//commit
117
			cdmApp.commitTransaction(tx);
118
			logger.warn("Citations updated!");
119
			return result;
120
		} catch (Exception e) {
121
			e.printStackTrace();
122
			logger.error("ERROR in citation update");
123
			return false;
124
		}
125
		
126
	}
127
	
128
	public boolean updateCollections(ICdmDataSource dataSource){
129
		DipteraCollectionImport collectionImport = new DipteraCollectionImport();
130
		return collectionImport.invoke(dataSource);
131
	}
132

    
133

    
134
	private void fillNameMaps(List<TaxonNameBase> nameList, Map<String, TaxonNameBase> nameMap, Map<String, TaxonNameBase> duplicateMap) {
135
		for (TaxonNameBase name : nameList){
136
			NonViralName nvn = name.deproxy(name, NonViralName.class);
137
			String nameCache = nvn.getNameCache();
138
			if (nameMap.containsKey(nameCache)){
139
				duplicateMap.put(nameCache, nvn);
140
			}else{
141
				nameMap.put(nameCache, nvn);
142
			}
143
		}
144
	}
145
	
146
	
147
	private TaxonNameBase getScientificName(String originalNameString, Map<String, TaxonNameBase> nameMap, Map<String, TaxonNameBase> nameDuplicateMap) {
148
		originalNameString = originalNameString.trim();
149
		TaxonNameBase result = nameMap.get(originalNameString);
150
		if (nameDuplicateMap.containsKey(originalNameString)){
151
			result = null;
152
		}
153
		return result;
154
	}
155

    
156
	private TaxonNameBase getScientificName(String originalNameString, INameService nameService) {
157
		Pager<TaxonNameBase> names = nameService.findByName(null, originalNameString, null, null, null, null, null, null);
158
		if (names.getCount() != 1){
159
			return null;
160
		}else{
161
			return names.getRecords().get(0);
162
		}
163
	}
164

    
165
	private String parseOriginalNameString(String text) {
166
		String originalName = "<OriginalName>";
167
		int start = text.indexOf(originalName);
168
		int end = text.indexOf("</OriginalName>");
169
		if (start >-1 ){
170
			text = text.substring(start + originalName.length(), end);
171
		}
172
		text = text.trim();
173
		return text;
174
	}
175

    
176
	private String parseNewText(String text) {
177
		int start = text.indexOf("</OriginalName>");
178
		text = text.substring(start + "</OriginalName>".length());
179
		text = text.trim();
180
		if (text.startsWith(":")){
181
			text = text.substring(1);
182
		}
183
		text = text.trim();
184
		return text;
185
	}
186

    
187
	private Set<TextData> getCitations(Taxon taxon) {
188
		Set<TextData> result = new HashSet<TextData>();
189
		Set<TaxonDescription> descriptions = taxon.getDescriptions();
190
		for (DescriptionBase description : descriptions){
191
			Set<DescriptionElementBase> elements = description.getElements();
192
			for (DescriptionElementBase element : elements){
193
				Feature feature = element.getFeature();
194
				if (feature.equals(Feature.CITATION())){
195
					if (! element.isInstanceOf(TextData.class)){
196
						logger.warn("Citation is not of class TextData but " + element.getClass().getSimpleName());
197
					}else{
198
						TextData textData = element.deproxy(element, TextData.class);
199
						result.add(textData);
200
					}
201
				}
202
			}
203
		}
204
		return result;
205
	}
206

    
207

    
208

    
209
	
210
	/**
211
	 * @param args
212
	 */
213
	public static void main(String[] args) {
214
		DipteraPostImportUpdater updater = new DipteraPostImportUpdater();
215
		try {
216
			updater.updateCitations(cdmDestination);
217
		} catch (Exception e) {
218
			e.printStackTrace();
219
			logger.error("ERROR in feature tree update");
220
		}
221
	}
222

    
223
}
(4-4/4)