Project

General

Profile

Download (7.4 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.app.wp6.diptera;
11

    
12
import java.util.HashMap;
13
import java.util.HashSet;
14
import java.util.List;
15
import java.util.Map;
16
import java.util.Set;
17

    
18
import org.apache.log4j.Logger;
19
import org.springframework.transaction.TransactionStatus;
20

    
21
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
22
import eu.etaxonomy.cdm.api.service.INameService;
23
import eu.etaxonomy.cdm.api.service.pager.Pager;
24
import eu.etaxonomy.cdm.app.common.CdmDestinations;
25
import eu.etaxonomy.cdm.database.DbSchemaValidation;
26
import eu.etaxonomy.cdm.database.ICdmDataSource;
27
import eu.etaxonomy.cdm.io.common.ImportResult;
28
import eu.etaxonomy.cdm.model.common.Language;
29
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
30
import eu.etaxonomy.cdm.model.description.DescriptionBase;
31
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
32
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
33
import eu.etaxonomy.cdm.model.description.Feature;
34
import eu.etaxonomy.cdm.model.description.TaxonDescription;
35
import eu.etaxonomy.cdm.model.description.TextData;
36
import eu.etaxonomy.cdm.model.name.TaxonName;
37
import eu.etaxonomy.cdm.model.taxon.Taxon;
38

    
39
/**
40
 * @author a.mueller
41
 * @since 01.10.2009
42
 * @version 1.0
43
 */
44
public class DipteraPostImportUpdater {
45
	private static final Logger logger = Logger.getLogger(DipteraPostImportUpdater.class);
46

    
47
	static final ICdmDataSource cdmDestination = CdmDestinations.localH2Palmae();
48

    
49
	/**
50
	 * This method updateds the citation text by deleting <code>OriginalName</code> tags and
51
	 * adding the original name to the source either as a link to an existing taxon name
52
	 * or as a string. The later becomes true if there is not exactly one matching name
53
	 * @param dataSource
54
	 * @return
55
	 */
56
	public ImportResult updateCitations(ICdmDataSource dataSource) {
57

    
58
			logger.warn("start updating citations");
59
			ImportResult result = new ImportResult();
60
			try{
61
			CdmApplicationController cdmApp = CdmApplicationController.NewInstance(dataSource, DbSchemaValidation.VALIDATE);
62
			Set<DescriptionElementBase> citationsToSave = new HashSet<DescriptionElementBase>();
63
			TransactionStatus tx = cdmApp.startTransaction();
64

    
65
			logger.warn("start updating citations ... application context started");
66
			int modCount = 100;
67
			int page = 0;
68
			int count = cdmApp.getTaxonService().count(Taxon.class);
69
			List<Taxon> taxonList = cdmApp.getTaxonService().list(Taxon.class, 100000, page, null, null);
70
			List<TaxonName> nameList = cdmApp.getNameService().list(null, 100000, page, null, null);
71
			Map<String, TaxonName> nameMap = new HashMap<>();
72
			Map<String, TaxonName> nameDuplicateMap = new HashMap<>();
73
			fillNameMaps(nameList, nameMap, nameDuplicateMap);
74

    
75
			int i = 0;
76

    
77
			for (Taxon taxon : taxonList){
78
				if ((i++ % modCount) == 0){ logger.warn("taxa handled: " + (i-1));}
79

    
80
				Set<TextData> citations = getCitations(taxon);
81
				for (TextData citation : citations){
82
					Language language = Language.DEFAULT();
83
					String text = citation.getText(language);
84
					String originalNameString = parseOriginalNameString(text);
85
					String newText = parseNewText(text);
86
					citation.removeText(language);
87
					citation.putText(language, newText);
88
					TaxonName scientificName = getScientificName(originalNameString, nameMap, nameDuplicateMap);
89

    
90
					Set<DescriptionElementSource> sources = citation.getSources();
91
					if (sources.size() > 1){
92
						logger.warn("There are more then 1 sources for a description");
93
					}else if (sources.size() == 0){
94
						DescriptionElementSource source = DescriptionElementSource.NewInstance(OriginalSourceType.PrimaryTaxonomicSource);
95
						citation.addSource(source);
96
						sources = citation.getSources();
97
					}
98
					for (DescriptionElementSource source : sources){
99
						if (scientificName != null){
100
							source.setNameUsedInSource(scientificName);
101
						}else{
102
							source.setOriginalNameString(originalNameString);
103
						}
104
					}
105

    
106
					citationsToSave.add(citation);
107
				}
108
			}
109

    
110
			cdmApp.getDescriptionService().saveDescriptionElement(citationsToSave);
111
			//commit
112
			cdmApp.commitTransaction(tx);
113
			logger.warn("Citations updated!");
114
			return result;
115
		} catch (Exception e) {
116
			e.printStackTrace();
117
			result.addError("ERROR in citation update");
118
			result.addException(e);
119
			return result;
120
		}
121

    
122
	}
123

    
124
	public ImportResult updateCollections(ICdmDataSource dataSource){
125
		DipteraCollectionImport collectionImport = new DipteraCollectionImport();
126
		return collectionImport.invoke(dataSource);
127
	}
128

    
129

    
130
	private void fillNameMaps(List<TaxonName> nameList, Map<String, TaxonName> nameMap, Map<String, TaxonName> duplicateMap) {
131
		for (TaxonName name : nameList){
132
			String nameCache = name.getNameCache();
133
			if (nameMap.containsKey(nameCache)){
134
				duplicateMap.put(nameCache, name);
135
			}else{
136
				nameMap.put(nameCache, name);
137
			}
138
		}
139
	}
140

    
141

    
142
	private TaxonName getScientificName(String originalNameString, Map<String, TaxonName> nameMap, Map<String, TaxonName> nameDuplicateMap) {
143
		originalNameString = originalNameString.trim();
144
		TaxonName result = nameMap.get(originalNameString);
145
		if (nameDuplicateMap.containsKey(originalNameString)){
146
			result = null;
147
		}
148
		return result;
149
	}
150

    
151
	private TaxonName getScientificName(String originalNameString, INameService nameService) {
152
		Pager<TaxonName> names = nameService.findByName(null, originalNameString, null, null, null, null, null, null);
153
		if (names.getCount() != 1){
154
			return null;
155
		}else{
156
			return names.getRecords().get(0);
157
		}
158
	}
159

    
160
	private String parseOriginalNameString(String text) {
161
		String originalName = "<OriginalName>";
162
		int start = text.indexOf(originalName);
163
		int end = text.indexOf("</OriginalName>");
164
		if (start >-1 ){
165
			text = text.substring(start + originalName.length(), end);
166
		}
167
		text = text.trim();
168
		return text;
169
	}
170

    
171
	private String parseNewText(String text) {
172
		int start = text.indexOf("</OriginalName>");
173
		text = text.substring(start + "</OriginalName>".length());
174
		text = text.trim();
175
		if (text.startsWith(":")){
176
			text = text.substring(1);
177
		}
178
		text = text.trim();
179
		return text;
180
	}
181

    
182
	private Set<TextData> getCitations(Taxon taxon) {
183
		Set<TextData> result = new HashSet<TextData>();
184
		Set<TaxonDescription> descriptions = taxon.getDescriptions();
185
		for (DescriptionBase description : descriptions){
186
			Set<DescriptionElementBase> elements = description.getElements();
187
			for (DescriptionElementBase element : elements){
188
				Feature feature = element.getFeature();
189
				if (feature.equals(Feature.CITATION())){
190
					if (! element.isInstanceOf(TextData.class)){
191
						logger.warn("Citation is not of class TextData but " + element.getClass().getSimpleName());
192
					}else{
193
						TextData textData = element.deproxy(element, TextData.class);
194
						result.add(textData);
195
					}
196
				}
197
			}
198
		}
199
		return result;
200
	}
201

    
202

    
203

    
204

    
205
	/**
206
	 * @param args
207
	 */
208
	public static void main(String[] args) {
209
		DipteraPostImportUpdater updater = new DipteraPostImportUpdater();
210
		try {
211
			updater.updateCitations(cdmDestination);
212
		} catch (Exception e) {
213
			e.printStackTrace();
214
			logger.error("ERROR in feature tree update");
215
		}
216
	}
217

    
218
}
(4-4/4)