ref #8257 remove factory method for term node creation in app-import
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / app / wp6 / diptera / DipteraPostImportUpdater.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.app.wp6.diptera;
11
12 import java.util.HashMap;
13 import java.util.HashSet;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Set;
17
18 import org.apache.log4j.Logger;
19 import org.springframework.transaction.TransactionStatus;
20
21 import eu.etaxonomy.cdm.api.application.CdmApplicationController;
22 import eu.etaxonomy.cdm.api.service.INameService;
23 import eu.etaxonomy.cdm.api.service.pager.Pager;
24 import eu.etaxonomy.cdm.app.common.CdmDestinations;
25 import eu.etaxonomy.cdm.database.DbSchemaValidation;
26 import eu.etaxonomy.cdm.database.ICdmDataSource;
27 import eu.etaxonomy.cdm.io.common.ImportResult;
28 import eu.etaxonomy.cdm.model.common.Language;
29 import eu.etaxonomy.cdm.model.description.DescriptionBase;
30 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
31 import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
32 import eu.etaxonomy.cdm.model.description.Feature;
33 import eu.etaxonomy.cdm.model.description.TaxonDescription;
34 import eu.etaxonomy.cdm.model.description.TextData;
35 import eu.etaxonomy.cdm.model.name.TaxonName;
36 import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
37 import eu.etaxonomy.cdm.model.taxon.Taxon;
38
39 /**
40 * @author a.mueller
41 * @since 01.10.2009
42 * @version 1.0
43 */
44 public class DipteraPostImportUpdater {
45 private static final Logger logger = Logger.getLogger(DipteraPostImportUpdater.class);
46
47 static final ICdmDataSource cdmDestination = CdmDestinations.localH2Palmae();
48
49 /**
50 * This method updateds the citation text by deleting <code>OriginalName</code> tags and
51 * adding the original name to the source either as a link to an existing taxon name
52 * or as a string. The later becomes true if there is not exactly one matching name
53 * @param dataSource
54 * @return
55 */
56 public ImportResult updateCitations(ICdmDataSource dataSource) {
57
58 logger.warn("start updating citations");
59 ImportResult result = new ImportResult();
60 try{
61 CdmApplicationController cdmApp = CdmApplicationController.NewInstance(dataSource, DbSchemaValidation.VALIDATE);
62 Set<DescriptionElementBase> citationsToSave = new HashSet<DescriptionElementBase>();
63 TransactionStatus tx = cdmApp.startTransaction();
64
65 logger.warn("start updating citations ... application context started");
66 int modCount = 100;
67 int page = 0;
68 List<Taxon> taxonList = cdmApp.getTaxonService().list(Taxon.class, 100000, page, null, null);
69 List<TaxonName> nameList = cdmApp.getNameService().list(null, 100000, page, null, null);
70 Map<String, TaxonName> nameMap = new HashMap<>();
71 Map<String, TaxonName> nameDuplicateMap = new HashMap<>();
72 fillNameMaps(nameList, nameMap, nameDuplicateMap);
73
74 int i = 0;
75
76 for (Taxon taxon : taxonList){
77 if ((i++ % modCount) == 0){ logger.warn("taxa handled: " + (i-1));}
78
79 Set<TextData> citations = getCitations(taxon);
80 for (TextData citation : citations){
81 Language language = Language.DEFAULT();
82 String text = citation.getText(language);
83 String originalNameString = parseOriginalNameString(text);
84 String newText = parseNewText(text);
85 citation.removeText(language);
86 citation.putText(language, newText);
87 TaxonName scientificName = getScientificName(originalNameString, nameMap, nameDuplicateMap);
88
89 Set<DescriptionElementSource> sources = citation.getSources();
90 if (sources.size() > 1){
91 logger.warn("There are more then 1 sources for a description");
92 }else if (sources.size() == 0){
93 DescriptionElementSource source = DescriptionElementSource.NewInstance(OriginalSourceType.PrimaryTaxonomicSource);
94 citation.addSource(source);
95 sources = citation.getSources();
96 }
97 for (DescriptionElementSource source : sources){
98 if (scientificName != null){
99 source.setNameUsedInSource(scientificName);
100 }else{
101 source.setOriginalNameString(originalNameString);
102 }
103 }
104
105 citationsToSave.add(citation);
106 }
107 }
108
109 cdmApp.getDescriptionService().saveDescriptionElement(citationsToSave);
110 //commit
111 cdmApp.commitTransaction(tx);
112 logger.warn("Citations updated!");
113 return result;
114 } catch (Exception e) {
115 e.printStackTrace();
116 result.addError("ERROR in citation update");
117 result.addException(e);
118 return result;
119 }
120
121 }
122
123 public ImportResult updateCollections(ICdmDataSource dataSource){
124 DipteraCollectionImport collectionImport = new DipteraCollectionImport();
125 return collectionImport.invoke(dataSource);
126 }
127
128
129 private void fillNameMaps(List<TaxonName> nameList, Map<String, TaxonName> nameMap, Map<String, TaxonName> duplicateMap) {
130 for (TaxonName name : nameList){
131 String nameCache = name.getNameCache();
132 if (nameMap.containsKey(nameCache)){
133 duplicateMap.put(nameCache, name);
134 }else{
135 nameMap.put(nameCache, name);
136 }
137 }
138 }
139
140
141 private TaxonName getScientificName(String originalNameString, Map<String, TaxonName> nameMap, Map<String, TaxonName> nameDuplicateMap) {
142 originalNameString = originalNameString.trim();
143 TaxonName result = nameMap.get(originalNameString);
144 if (nameDuplicateMap.containsKey(originalNameString)){
145 result = null;
146 }
147 return result;
148 }
149
150 private TaxonName getScientificName(String originalNameString, INameService nameService) {
151 Pager<TaxonName> names = nameService.findByName(null, originalNameString, null, null, null, null, null, null);
152 if (names.getCount() != 1){
153 return null;
154 }else{
155 return names.getRecords().get(0);
156 }
157 }
158
159 private String parseOriginalNameString(String text) {
160 String originalName = "<OriginalName>";
161 int start = text.indexOf(originalName);
162 int end = text.indexOf("</OriginalName>");
163 if (start >-1 ){
164 text = text.substring(start + originalName.length(), end);
165 }
166 text = text.trim();
167 return text;
168 }
169
170 private String parseNewText(String text) {
171 int start = text.indexOf("</OriginalName>");
172 text = text.substring(start + "</OriginalName>".length());
173 text = text.trim();
174 if (text.startsWith(":")){
175 text = text.substring(1);
176 }
177 text = text.trim();
178 return text;
179 }
180
181 private Set<TextData> getCitations(Taxon taxon) {
182 Set<TextData> result = new HashSet<TextData>();
183 Set<TaxonDescription> descriptions = taxon.getDescriptions();
184 for (DescriptionBase description : descriptions){
185 Set<DescriptionElementBase> elements = description.getElements();
186 for (DescriptionElementBase element : elements){
187 Feature feature = element.getFeature();
188 if (feature.equals(Feature.CITATION())){
189 if (! element.isInstanceOf(TextData.class)){
190 logger.warn("Citation is not of class TextData but " + element.getClass().getSimpleName());
191 }else{
192 TextData textData = element.deproxy(element, TextData.class);
193 result.add(textData);
194 }
195 }
196 }
197 }
198 return result;
199 }
200
201
202
203
204 /**
205 * @param args
206 */
207 public static void main(String[] args) {
208 DipteraPostImportUpdater updater = new DipteraPostImportUpdater();
209 try {
210 updater.updateCitations(cdmDestination);
211 } catch (Exception e) {
212 e.printStackTrace();
213 logger.error("ERROR in feature tree update");
214 }
215 }
216
217 }