ref #9918 distinguish default and parsedEntity deduplication in ImportDeduplicationHe...
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / redlist / gefaesspflanzen / excel / RedListGefaesspflanzenTaxonExcelImport.java
1 // $Id$
2 /**
3 * Copyright (C) 2017 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.io.redlist.gefaesspflanzen.excel;
11
12 import java.util.Map;
13 import java.util.UUID;
14
15 import org.apache.commons.lang3.StringUtils;
16 import org.apache.log4j.Logger;
17 import org.springframework.stereotype.Component;
18 import org.springframework.transaction.TransactionStatus;
19
20 import eu.etaxonomy.cdm.common.CdmUtils;
21 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
22 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
23 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
24 import eu.etaxonomy.cdm.model.common.Language;
25 import eu.etaxonomy.cdm.model.name.IBotanicalName;
26 import eu.etaxonomy.cdm.model.name.Rank;
27 import eu.etaxonomy.cdm.model.name.TaxonName;
28 import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
29 import eu.etaxonomy.cdm.model.reference.Reference;
30 import eu.etaxonomy.cdm.model.taxon.Classification;
31 import eu.etaxonomy.cdm.model.taxon.Synonym;
32 import eu.etaxonomy.cdm.model.taxon.Taxon;
33 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
34 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
35 import eu.etaxonomy.cdm.strategy.homotypicgroup.BasionymRelationCreator;
36 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
37
38 /**
39 * Import for German red list checklist for plantae.
40 * @author a.mueller
41 * @since 13.06.2019
42 */
43 @Component
44 public class RedListGefaesspflanzenTaxonExcelImport<CONFIG extends RedListGefaesspflanzenExcelImportConfigurator>
45 extends SimpleExcelTaxonImport<CONFIG> {
46
47 private static final long serialVersionUID = -884838817884874228L;
48 private static final Logger logger = Logger.getLogger(RedListGefaesspflanzenTaxonExcelImport.class);
49
50 private static final String ID_COL = "SORT_ID";
51 private static final String UUID_COL = "TAXON_UUID";
52 private static final String SYN_FLAG_COL = "SYN_FLAG";
53 private static final String VOLLNAME_COL = "VOLLNAME";
54 private static final String WISS_NAME_COL = "WISS_NAME";
55 private static final String AUTHOR_COL = "AUTOR";
56 private static final String RANK_COL = "RANG";
57 private static final String ZUSATZ_COL = "ZUSATZ";
58
59
60 private static UUID rootUuid = UUID.fromString("235ae474-227f-438a-b132-4508053fcb1c");
61 private static UUID plantaeUuid = UUID.fromString("31bd1b7c-245a-416d-b076-aa090c7469ce");
62
63 private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
64 private BasionymRelationCreator basionymCreator = new BasionymRelationCreator();
65
66
67 @Override
68 protected String getWorksheetName(CONFIG config) {
69 return "Florenliste";
70 }
71
72 private boolean isFirst = true;
73 private TransactionStatus tx = null;
74
75 @Override
76 protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
77 if (isFirst){
78 tx = this.startTransaction();
79 isFirst = false;
80 }
81
82 String line = state.getCurrentLine() + ": ";
83 Map<String, String> record = state.getOriginalRecord();
84
85 String noStr = getValue(record, ID_COL);
86
87 //species
88 TaxonBase<?> taxon = makeTaxon(state, line, record, noStr);
89
90 getTaxonService().save(taxon);
91 saveNameRelations(taxon.getName());
92 }
93
94
95 @Override
96 protected void secondPass(SimpleExcelTaxonImportState<CONFIG> state) {
97 if (tx != null){
98 this.commitTransaction(tx);
99 tx = null;
100 }
101 }
102
103
104
105
106 /**
107 * @param col
108 * @return
109 */
110 private String getNamespace(CONFIG config) {
111 return getWorksheetName(config)+"."+ ID_COL;
112 }
113
114
115
116 /**
117 * @param state
118 * @param line
119 * @param record
120 * @param noStr
121 * @return
122 */
123 private TaxonBase<?> makeTaxon(SimpleExcelTaxonImportState<CONFIG> state, String line, Map<String, String> record,
124 String noStr) {
125
126 // TaxonNode familyTaxon = getFamilyTaxon(record, state);
127 // if (familyTaxon == null){
128 // logger.warn(line + "Family not created: " + record.get(FAMILIA));
129 // }
130
131 String nameStr = getValue(record, WISS_NAME_COL);
132 String authorStr = getValue(record, AUTHOR_COL);
133 String synFlag = getValue(record, SYN_FLAG_COL);
134 String uuidTaxon = getValue(record, UUID_COL);
135 String vollName = getValue(record, VOLLNAME_COL);
136 String zusatz = getValue(record, ZUSATZ_COL);
137
138 String sensuStr;
139 if (StringUtils.isNotEmpty(zusatz) && zusatz.startsWith("s. ")){
140 sensuStr = zusatz.split(",")[0].trim();
141 }else {
142 sensuStr = null;
143 }
144 String nomStatusStr;
145 if (StringUtils.isNotEmpty(zusatz) && !zusatz.trim().equals(sensuStr)){
146 nomStatusStr = sensuStr == null? zusatz.trim():zusatz.split(",")[1].trim();
147 }else{
148 nomStatusStr = null;
149 }
150
151
152 nameStr = CdmUtils.concat(" ", nameStr, authorStr);
153 boolean isAuct = nameStr.endsWith("auct.");
154 nameStr = normalizeNameStr(nameStr);
155
156 Rank rank = Rank.SPECIES();
157 TaxonName name = (TaxonName)parser.parseFullName(nameStr, state.getConfig().getNomenclaturalCode(), rank);
158 name.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
159 name = state.getDeduplicationHelper().getExistingName(name, true);
160 if (name.isProtectedTitleCache()){
161 logger.warn(line + "Name could not be parsed: " + nameStr);
162 }
163
164 state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
165
166 TaxonBase<?> taxon;
167 if ("1".equals(synFlag) || isAuct){
168 taxon = Taxon.NewInstance(name, getSecReference(state));
169 }else if ("b".equals(synFlag)||"x".equals(synFlag)){
170 taxon = Synonym.NewInstance(name, getSecReference(state));
171 }else{
172 logger.warn("Unknown synFlag: " + synFlag);
173 return null;
174 }
175 taxon.setUuid(UUID.fromString(uuidTaxon));
176 if (isAuct){
177 taxon.setAppendedPhrase("auct."); //TODO
178 }
179 if (sensuStr != null){
180 taxon.setAppendedPhrase(sensuStr);
181 }
182
183 taxon.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
184
185 checkVollname(state, taxon, vollName, sensuStr, isAuct);
186 return taxon;
187 }
188
189
190
191 /**
192 * @param state
193 * @param taxon
194 * @param sensuStr
195 * @param isAuct
196 * @param vollName
197 */
198 private void checkVollname(SimpleExcelTaxonImportState<CONFIG> state, TaxonBase<?> taxon, String vollName, String sensuStr, boolean isAuct) {
199 TaxonName name = taxon.getName();
200 String titleCache = (sensuStr == null && !isAuct) ? name.getTitleCache() : taxon.getTitleCache();
201 vollName = vollName.replace(" agg.", " aggr.").replace(" (E)", "");
202 if (!titleCache.equals(vollName)){
203 logger.warn("Vollname weicht ab: " + vollName +" <-> " + titleCache);
204 }
205 }
206
207
208 /**
209 * @param nameStr
210 */
211 private String normalizeNameStr(String nameStr) {
212 String result = nameStr.replace(" agg.", " aggr.").replaceAll(" auct.$", "")
213 .replaceAll(" grex ", " subsp. ").replaceAll(" sublusus ", " subsp. ");
214 return result;
215 }
216
217 private TaxonNode rootNode;
218 private TaxonNode getClassification(SimpleExcelTaxonImportState<CONFIG> state) {
219 if (rootNode == null){
220 Reference sec = getSecReference(state);
221 String classificationName = state.getConfig().getClassificationName();
222 Language language = Language.DEFAULT();
223 Classification classification = Classification.NewInstance(classificationName, sec, language);
224 classification.setUuid(state.getConfig().getClassificationUuid());
225 classification.getRootNode().setUuid(rootUuid);
226
227 IBotanicalName plantaeName = TaxonNameFactory.NewBotanicalInstance(Rank.KINGDOM());
228 plantaeName.setGenusOrUninomial("Plantae");
229 Taxon plantae = Taxon.NewInstance(plantaeName, sec);
230 TaxonNode plantaeNode = classification.addChildTaxon(plantae, null, null);
231 plantaeNode.setUuid(plantaeUuid);
232 getClassificationService().save(classification);
233
234 rootNode = plantaeNode;
235 }
236 return rootNode;
237 }
238
239
240 // protected IBotanicalName makeFamilyName(SimpleExcelTaxonImportState<CONFIG> state, String famStr) {
241 // IBotanicalName name = TaxonNameFactory.NewBotanicalInstance(Rank.FAMILY());
242 // famStr = decapitalize(famStr);
243 // name.setGenusOrUninomial(famStr);
244 // name.addSource(makeOriginalSource(state));
245 // return name;
246 // }
247
248 /**
249 * @param state
250 * @return
251 */
252 @Override
253 protected IdentifiableSource makeOriginalSource(SimpleExcelTaxonImportState<CONFIG> state) {
254 return IdentifiableSource.NewDataImportInstance(getValue(state.getOriginalRecord(),ID_COL),
255 getNamespace(state.getConfig()), state.getConfig().getSourceReference());
256 }
257
258
259 protected Reference getSecReference(SimpleExcelTaxonImportState<CONFIG> state) {
260 return state.getConfig().getSecReference();
261 }
262
263 /**
264 * @param state
265 * @return
266 */
267 protected Reference getSourceCitation(SimpleExcelTaxonImportState<CONFIG> state) {
268 return state.getConfig().getSourceReference();
269 }
270
271 /**
272 * @param state
273 * @param parentStr
274 * @return
275 */
276 private TaxonNode getParent(SimpleExcelTaxonImportState<CONFIG> state, String parentStr) {
277 Taxon taxon = state.getHigherTaxon(parentStr);
278
279 return taxon == null ? null : taxon.getTaxonNodes().iterator().next();
280 }
281
282 }