Revision f261b330
Added by Andreas Kohlbecker over 7 years ago
app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java | ||
---|---|---|
17 | 17 |
import eu.etaxonomy.cdm.model.reference.Reference; |
18 | 18 |
import eu.etaxonomy.cdm.model.taxon.*; |
19 | 19 |
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl; |
20 |
import org.apache.commons.lang.ArrayUtils; |
|
20 | 21 |
import org.apache.commons.lang.StringEscapeUtils; |
21 | 22 |
import org.apache.commons.lang.StringUtils; |
22 | 23 |
import org.apache.log4j.Level; |
... | ... | |
66 | 67 |
private static final Pattern nomRefTokenizeP = Pattern.compile("^(.*):\\s([^\\.:]+)\\.(.*)$"); |
67 | 68 |
private static final Pattern nomRefPubYearExtractP = Pattern.compile("(.*?)(1[7,8,9][0-9]{2}).*$|^.*?[0-9]{1,2}([\\./])[0-1]?[0-9]\\3([0-9]{2})\\.$"); // 1700 - 1999 |
68 | 69 |
|
70 |
private MarkerType markerTypeFossil = null; |
|
71 |
|
|
69 | 72 |
private Taxon makeTaxon(HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state, |
70 |
TaxonNode higherTaxonNode, boolean isSynonym) { |
|
73 |
TaxonNode higherTaxonNode, boolean isSynonym, boolean isFossil) {
|
|
71 | 74 |
|
72 | 75 |
String line = state.getCurrentLine() + ": "; |
73 | 76 |
|
... | ... | |
75 | 78 |
String nameStr = getValue(record, NAMESTRING, true); |
76 | 79 |
String authorStr = getValue(record, AUTHORSTRING, true); |
77 | 80 |
String nomRefStr = getValue(record, LITSTRING, true); |
81 |
String authorsSpelling = getValue(record, AUTHORSSPELLING, true); |
|
78 | 82 |
|
79 | 83 |
String nomRefTitle = null; |
80 | 84 |
String nomRefDetail = null; |
... | ... | |
94 | 98 |
Matcher m2 = nomRefPubYearExtractP.matcher(nomRefPupDate); |
95 | 99 |
if(m2.matches()){ |
96 | 100 |
nomRefPupYear = m2.group(2); |
101 |
if(nomRefPupYear == null){ |
|
102 |
nomRefPupYear = m2.group(4); |
|
103 |
} |
|
104 |
if(nomRefPupYear == null){ |
|
105 |
logger.error("nomRefPupYear in " + nomRefStr + " is NULL" ); |
|
106 |
} |
|
97 | 107 |
if(nomRefPupYear.length() == 2 ){ |
98 | 108 |
// it is an abbreviated year from the 19** years |
99 | 109 |
nomRefPupYear = "19" + nomRefPupYear; |
... | ... | |
108 | 118 |
} |
109 | 119 |
} |
110 | 120 |
|
111 |
|
|
112 | 121 |
BotanicalName taxonName; |
122 |
// cache field for the taxonName.titleCache |
|
123 |
String taxonNameTitleCache = null; |
|
113 | 124 |
Map<String, AnnotationType> nameAnnotations = new HashMap<>(); |
114 | 125 |
|
115 |
if(titleCacheStr.endsWith(ANNOTATION_MARKER_STRING) && authorStr.endsWith(ANNOTATION_MARKER_STRING)){ |
|
126 |
// TitleCache preprocessing |
|
127 |
if(titleCacheStr.endsWith(ANNOTATION_MARKER_STRING) || authorStr.endsWith(ANNOTATION_MARKER_STRING)){ |
|
116 | 128 |
nameAnnotations.put("Author abbreviation not checked.", AnnotationType.EDITORIAL()); |
117 | 129 |
titleCacheStr = titleCacheStr.replace(ANNOTATION_MARKER_STRING, "").trim(); |
118 | 130 |
authorStr = authorStr.replace(ANNOTATION_MARKER_STRING, "").trim(); |
119 | 131 |
} |
120 | 132 |
|
133 |
// parse the full taxon name |
|
121 | 134 |
if(!StringUtils.isEmpty(nomRefTitle)){ |
122 | 135 |
String referenceSeparator = nomRefTitle.startsWith("in ") ? " " : ", "; |
123 | 136 |
String taxonFullNameStr = titleCacheStr + referenceSeparator + nomRefTitle; |
... | ... | |
127 | 140 |
taxonName = (BotanicalName) nameParser.parseFullName(titleCacheStr, NomenclaturalCode.ICNAFP, null); |
128 | 141 |
} |
129 | 142 |
|
143 |
taxonNameTitleCache = taxonName.getTitleCache().trim(); |
|
130 | 144 |
if (taxonName.isProtectedTitleCache()) { |
131 | 145 |
logger.warn(line + "Name could not be parsed: " + titleCacheStr); |
132 | 146 |
} else { |
133 | 147 |
|
134 | 148 |
boolean doRestoreTitleCacheStr = false; |
135 | 149 |
// Check titleCache |
136 |
String generatedTitleCache = taxonName.getTitleCache(); |
|
137 |
if (!generatedTitleCache.trim().equals(titleCacheStr)) { |
|
138 |
logger.warn(line + "The generated titleCache differs from the imported string : " + generatedTitleCache + " <> " + titleCacheStr + " will restore original titleCacheStr"); |
|
150 |
if (!taxonNameTitleCache.equals(titleCacheStr)) { |
|
151 |
logger.warn(line + "The generated titleCache differs from the imported string : " + taxonNameTitleCache + " <> " + titleCacheStr + " will restore original titleCacheStr"); |
|
139 | 152 |
doRestoreTitleCacheStr = true; |
140 | 153 |
} |
141 | 154 |
// Check Name |
142 |
if (!taxonName.getNameCache().trim().equals(nameStr)) { |
|
143 |
logger.warn(line + "parsed nameCache differs from " + NAMESTRING + " : " + taxonName.getNameCache() + " <> " + nameStr); |
|
155 |
String nameCache = taxonName.getNameCache(); |
|
156 |
if (!nameCache.trim().equals(nameStr)) { |
|
157 |
logger.warn(line + "parsed nameCache differs from " + NAMESTRING + " : " + nameCache + " <> " + nameStr); |
|
144 | 158 |
} |
145 | 159 |
|
146 | 160 |
// Author |
... | ... | |
156 | 170 |
|
157 | 171 |
// deduplicate |
158 | 172 |
replaceAuthorNamesAndNomRef(state, taxonName); |
173 |
} |
|
174 |
|
|
175 |
// Annotations |
|
176 |
if(!nameAnnotations.isEmpty()){ |
|
177 |
for(String text : nameAnnotations.keySet()){ |
|
178 |
taxonName.addAnnotation(Annotation.NewInstance(text, nameAnnotations.get(text), Language.DEFAULT())); |
|
179 |
} |
|
180 |
getNameService().save(taxonName); |
|
181 |
} |
|
182 |
|
|
183 |
// Namerelations |
|
184 |
if(!StringUtils.isEmpty(authorsSpelling)){ |
|
185 |
authorsSpelling = authorsSpelling.replaceFirst("Author's spelling:", "").replaceAll("\"", "").trim(); |
|
159 | 186 |
|
160 |
// Annotations |
|
161 |
if(!nameAnnotations.isEmpty()){ |
|
162 |
for(String text : nameAnnotations.keySet()){ |
|
163 |
taxonName.addAnnotation(Annotation.NewInstance(text, nameAnnotations.get(text), Language.DEFAULT())); |
|
187 |
String[] authorSpellingTokens = StringUtils.split(authorsSpelling, " "); |
|
188 |
String[] nameStrTokens = StringUtils.split(nameStr, " "); |
|
189 |
|
|
190 |
ArrayUtils.reverse(authorSpellingTokens); |
|
191 |
ArrayUtils.reverse(nameStrTokens); |
|
192 |
|
|
193 |
for (int i = 0; i < nameStrTokens.length; i++){ |
|
194 |
if(i < authorSpellingTokens.length){ |
|
195 |
nameStrTokens[i] = authorSpellingTokens[i]; |
|
164 | 196 |
} |
165 |
getNameService().save(taxonName); |
|
166 | 197 |
} |
198 |
ArrayUtils.reverse(nameStrTokens); |
|
199 |
|
|
200 |
String misspelledNameStr = StringUtils.join (nameStrTokens, ' '); |
|
201 |
// build the fullnameString of the misspelled name |
|
202 |
misspelledNameStr = taxonNameTitleCache.replace(nameStr, misspelledNameStr); |
|
203 |
|
|
204 |
TaxonNameBase misspelledName = (BotanicalName) nameParser.parseReferencedName(misspelledNameStr, NomenclaturalCode.ICNAFP, null); |
|
205 |
misspelledName.addRelationshipToName(taxonName, NameRelationshipType.MISSPELLING(), null); |
|
206 |
getNameService().save(misspelledName); |
|
167 | 207 |
} |
168 | 208 |
|
169 | 209 |
Reference sec = state.getConfig().getSecReference(); |
170 | 210 |
Taxon taxon = Taxon.NewInstance(taxonName, sec); |
211 |
|
|
212 |
// Markers |
|
213 |
if(isFossil){ |
|
214 |
taxon.addMarker(Marker.NewInstance(markerTypeFossil(), true)); |
|
215 |
} |
|
216 |
|
|
171 | 217 |
getTaxonService().save(taxon); |
172 | 218 |
if(higherTaxonNode != null){ |
173 | 219 |
higherTaxonNode.addChildTaxon(taxon, null, null); |
... | ... | |
260 | 306 |
} |
261 | 307 |
|
262 | 308 |
String reg_id = record.get(REGISTRATIONNO_PK); |
309 |
|
|
263 | 310 |
//higherTaxon |
264 |
TaxonNode higherTaxon = getHigherTaxon(record, (IAPTImportState)state); |
|
311 |
String higherTaxaString = record.get(HIGHERTAXON); |
|
312 |
boolean isFossil = false; |
|
313 |
if(higherTaxaString.startsWith("FOSSIL ")){ |
|
314 |
higherTaxaString = higherTaxaString.replace("FOSSIL ", ""); |
|
315 |
isFossil = true; |
|
316 |
} |
|
317 |
TaxonNode higherTaxon = getHigherTaxon(higherTaxaString, (IAPTImportState)state); |
|
265 | 318 |
|
266 | 319 |
//Taxon |
267 |
Taxon taxon = makeTaxon(record, state, higherTaxon, isSynonymOnly); |
|
320 |
Taxon taxon = makeTaxon(record, state, higherTaxon, isSynonymOnly, isFossil);
|
|
268 | 321 |
if (taxon == null && ! isSynonymOnly){ |
269 | 322 |
logger.warn(line + "taxon could not be created and is null"); |
270 | 323 |
return; |
... | ... | |
281 | 334 |
return; |
282 | 335 |
} |
283 | 336 |
|
284 |
private TaxonNode getHigherTaxon(HashMap<String, String> record, IAPTImportState state) {
|
|
285 |
String higherTaxaString = record.get(HIGHERTAXON); |
|
337 |
private TaxonNode getHigherTaxon(String higherTaxaString, IAPTImportState state) {
|
|
338 |
|
|
286 | 339 |
// higherTaxaString is like |
287 | 340 |
// - DICOTYLEDONES: LEGUMINOSAE: MIMOSOIDEAE |
288 | 341 |
// - FOSSIL DICOTYLEDONES: PROTEACEAE |
... | ... | |
376 | 429 |
return ref; |
377 | 430 |
} |
378 | 431 |
|
432 |
private MarkerType markerTypeFossil(){ |
|
433 |
if(this.markerTypeFossil == null){ |
|
434 |
markerTypeFossil = MarkerType.NewInstance("isFossilTaxon", "isFossil", null); |
|
435 |
getTermService().save(this.markerTypeFossil); |
|
436 |
} |
|
437 |
return markerTypeFossil; |
|
438 |
} |
|
439 |
|
|
379 | 440 |
|
380 | 441 |
|
381 | 442 |
} |
Also available in: Unified diff
ref #6026 multiple improvements