ref #694 adapt appimport to new DescriptionElementService
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / app / wp6 / diptera / DipteraPostImportUpdater.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.app.wp6.diptera;
10
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.List;
14 import java.util.Map;
15 import java.util.Set;
16
17 import org.apache.log4j.Logger;
18 import org.springframework.transaction.TransactionStatus;
19
20 import eu.etaxonomy.cdm.api.application.CdmApplicationController;
21 import eu.etaxonomy.cdm.api.service.INameService;
22 import eu.etaxonomy.cdm.api.service.pager.Pager;
23 import eu.etaxonomy.cdm.app.common.CdmDestinations;
24 import eu.etaxonomy.cdm.database.DbSchemaValidation;
25 import eu.etaxonomy.cdm.database.ICdmDataSource;
26 import eu.etaxonomy.cdm.io.common.ImportResult;
27 import eu.etaxonomy.cdm.model.common.Language;
28 import eu.etaxonomy.cdm.model.description.DescriptionBase;
29 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
30 import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
31 import eu.etaxonomy.cdm.model.description.Feature;
32 import eu.etaxonomy.cdm.model.description.TaxonDescription;
33 import eu.etaxonomy.cdm.model.description.TextData;
34 import eu.etaxonomy.cdm.model.name.TaxonName;
35 import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
36 import eu.etaxonomy.cdm.model.taxon.Taxon;
37
38 /**
39 * @author a.mueller
40 * @since 01.10.2009
41 */
42 public class DipteraPostImportUpdater {
43 private static final Logger logger = Logger.getLogger(DipteraPostImportUpdater.class);
44
45 static final ICdmDataSource cdmDestination = CdmDestinations.localH2Palmae();
46
47 /**
48 * This method updateds the citation text by deleting <code>OriginalName</code> tags and
49 * adding the original name to the source either as a link to an existing taxon name
50 * or as a string. The later becomes true if there is not exactly one matching name
51 * @param dataSource
52 * @return
53 */
54 public ImportResult updateCitations(ICdmDataSource dataSource) {
55
56 logger.warn("start updating citations");
57 ImportResult result = new ImportResult();
58 try{
59 CdmApplicationController cdmApp = CdmApplicationController.NewInstance(dataSource, DbSchemaValidation.VALIDATE);
60 Set<DescriptionElementBase> citationsToSave = new HashSet<>();
61 TransactionStatus tx = cdmApp.startTransaction();
62
63 logger.warn("start updating citations ... application context started");
64 int modCount = 100;
65 int page = 0;
66 List<Taxon> taxonList = cdmApp.getTaxonService().list(Taxon.class, 100000, page, null, null);
67 List<TaxonName> nameList = cdmApp.getNameService().list(null, 100000, page, null, null);
68 Map<String, TaxonName> nameMap = new HashMap<>();
69 Map<String, TaxonName> nameDuplicateMap = new HashMap<>();
70 fillNameMaps(nameList, nameMap, nameDuplicateMap);
71
72 int i = 0;
73
74 for (Taxon taxon : taxonList){
75 if ((i++ % modCount) == 0){ logger.warn("taxa handled: " + (i-1));}
76
77 Set<TextData> citations = getCitations(taxon);
78 for (TextData citation : citations){
79 Language language = Language.DEFAULT();
80 String text = citation.getText(language);
81 String originalNameString = parseOriginalNameString(text);
82 String newText = parseNewText(text);
83 citation.removeText(language);
84 citation.putText(language, newText);
85 TaxonName scientificName = getScientificName(originalNameString, nameMap, nameDuplicateMap);
86
87 Set<DescriptionElementSource> sources = citation.getSources();
88 if (sources.size() > 1){
89 logger.warn("There are more then 1 sources for a description");
90 }else if (sources.size() == 0){
91 DescriptionElementSource source = DescriptionElementSource.NewInstance(OriginalSourceType.PrimaryTaxonomicSource);
92 citation.addSource(source);
93 sources = citation.getSources();
94 }
95 for (DescriptionElementSource source : sources){
96 if (scientificName != null){
97 source.setNameUsedInSource(scientificName);
98 }else{
99 source.setOriginalNameString(originalNameString);
100 }
101 }
102
103 citationsToSave.add(citation);
104 }
105 }
106
107 cdmApp.getDescriptionElementService().save(citationsToSave);
108 //commit
109 cdmApp.commitTransaction(tx);
110 logger.warn("Citations updated!");
111 return result;
112 } catch (Exception e) {
113 e.printStackTrace();
114 result.addError("ERROR in citation update");
115 result.addException(e);
116 return result;
117 }
118
119 }
120
121 public ImportResult updateCollections(ICdmDataSource dataSource){
122 DipteraCollectionImport collectionImport = new DipteraCollectionImport();
123 return collectionImport.invoke(dataSource);
124 }
125
126
127 private void fillNameMaps(List<TaxonName> nameList, Map<String, TaxonName> nameMap, Map<String, TaxonName> duplicateMap) {
128 for (TaxonName name : nameList){
129 String nameCache = name.getNameCache();
130 if (nameMap.containsKey(nameCache)){
131 duplicateMap.put(nameCache, name);
132 }else{
133 nameMap.put(nameCache, name);
134 }
135 }
136 }
137
138
139 private TaxonName getScientificName(String originalNameString, Map<String, TaxonName> nameMap, Map<String, TaxonName> nameDuplicateMap) {
140 originalNameString = originalNameString.trim();
141 TaxonName result = nameMap.get(originalNameString);
142 if (nameDuplicateMap.containsKey(originalNameString)){
143 result = null;
144 }
145 return result;
146 }
147
148 private TaxonName getScientificName(String originalNameString, INameService nameService) {
149 Pager<TaxonName> names = nameService.findByName(null, originalNameString, null, null, null, null, null, null);
150 if (names.getCount() != 1){
151 return null;
152 }else{
153 return names.getRecords().get(0);
154 }
155 }
156
157 private String parseOriginalNameString(String text) {
158 String originalName = "<OriginalName>";
159 int start = text.indexOf(originalName);
160 int end = text.indexOf("</OriginalName>");
161 if (start >-1 ){
162 text = text.substring(start + originalName.length(), end);
163 }
164 text = text.trim();
165 return text;
166 }
167
168 private String parseNewText(String text) {
169 int start = text.indexOf("</OriginalName>");
170 text = text.substring(start + "</OriginalName>".length());
171 text = text.trim();
172 if (text.startsWith(":")){
173 text = text.substring(1);
174 }
175 text = text.trim();
176 return text;
177 }
178
179 private Set<TextData> getCitations(Taxon taxon) {
180 Set<TextData> result = new HashSet<TextData>();
181 Set<TaxonDescription> descriptions = taxon.getDescriptions();
182 for (DescriptionBase description : descriptions){
183 Set<DescriptionElementBase> elements = description.getElements();
184 for (DescriptionElementBase element : elements){
185 Feature feature = element.getFeature();
186 if (feature.equals(Feature.CITATION())){
187 if (! element.isInstanceOf(TextData.class)){
188 logger.warn("Citation is not of class TextData but " + element.getClass().getSimpleName());
189 }else{
190 TextData textData = element.deproxy(element, TextData.class);
191 result.add(textData);
192 }
193 }
194 }
195 }
196 return result;
197 }
198
199
200
201
202 /**
203 * @param args
204 */
205 public static void main(String[] args) {
206 DipteraPostImportUpdater updater = new DipteraPostImportUpdater();
207 try {
208 updater.updateCitations(cdmDestination);
209 } catch (Exception e) {
210 e.printStackTrace();
211 logger.error("ERROR in feature tree update");
212 }
213 }
214
215 }