Revision 2fedea5a
Added by Andreas Müller about 1 year ago
app-import/src/main/java/eu/etaxonomy/cdm/io/caryo/CaryoSileneaeNameImport.java | ||
---|---|---|
8 | 8 |
*/ |
9 | 9 |
package eu.etaxonomy.cdm.io.caryo; |
10 | 10 |
|
11 |
import java.util.HashMap; |
|
12 |
import java.util.List; |
|
13 | 11 |
import java.util.Map; |
14 | 12 |
import java.util.UUID; |
15 | 13 |
|
... | ... | |
17 | 15 |
import org.apache.logging.log4j.Logger; |
18 | 16 |
import org.springframework.stereotype.Component; |
19 | 17 |
|
20 |
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade; |
|
21 | 18 |
import eu.etaxonomy.cdm.common.CdmUtils; |
19 |
import eu.etaxonomy.cdm.common.DoubleResult; |
|
20 |
import eu.etaxonomy.cdm.facade.DerivedUnitFacade; |
|
22 | 21 |
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException; |
23 | 22 |
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState; |
24 | 23 |
import eu.etaxonomy.cdm.model.common.Annotation; |
... | ... | |
32 | 31 |
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType; |
33 | 32 |
import eu.etaxonomy.cdm.model.reference.Reference; |
34 | 33 |
import eu.etaxonomy.cdm.model.term.DefinedTerm; |
35 |
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException; |
|
36 | 34 |
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl; |
37 | 35 |
|
38 | 36 |
/** |
... | ... | |
45 | 43 |
private static final long serialVersionUID = 8931253645038029899L; |
46 | 44 |
private static final Logger logger = LogManager.getLogger(); |
47 | 45 |
|
48 |
private static final String NOMEN_ID = "nomen_id"; |
|
49 |
private static final String RANK = "rank"; |
|
46 |
private static final UUID uuidSileneaeInfoNameIdType = UUID.fromString("95ecbf6d-521d-447f-bae5-d82585ff3617"); |
|
47 |
|
|
48 |
private static final String NOMEN_ID = "nomen_ID"; |
|
49 |
private static final String RANK = "Rank"; |
|
50 | 50 |
private static final String GENUS = "Genus"; |
51 | 51 |
private static final String INFRAGEN_NAME = "Infragen_name"; |
52 | 52 |
private static final String SPECIES = "Species"; |
... | ... | |
72 | 72 |
@SuppressWarnings("unused") |
73 | 73 |
private static final String SynonymyReference_link = "SynonymyReference_link"; |
74 | 74 |
|
75 |
private Map<Integer, UUID> nameMapping = new HashMap<>(); |
|
76 |
|
|
77 |
private SimpleExcelTaxonImportState<CaryoSileneaeImportConfigurator> state; |
|
75 |
@Override |
|
76 |
protected String getWorksheetName(CaryoSileneaeImportConfigurator config) { |
|
77 |
return "Names"; |
|
78 |
} |
|
78 | 79 |
|
79 | 80 |
@Override |
80 | 81 |
protected void firstPass(SimpleExcelTaxonImportState<CaryoSileneaeImportConfigurator> state) { |
81 | 82 |
int line = state.getCurrentLine(); |
82 |
if ((line % 500) == 0){ |
|
83 |
newTransaction(state); |
|
84 |
System.out.println(line); |
|
85 |
} |
|
83 |
// if ((line % 500) == 0){
|
|
84 |
// newTransaction(state);
|
|
85 |
// System.out.println(line);
|
|
86 |
// }
|
|
86 | 87 |
|
87 |
this.state = state; |
|
88 | 88 |
Map<String, String> record = state.getOriginalRecord(); |
89 | 89 |
|
90 | 90 |
Integer nomenId = Integer.valueOf(getValue(record, NOMEN_ID)); |
... | ... | |
93 | 93 |
String infragenStr = getValue(record, INFRAGEN_NAME); |
94 | 94 |
String speciesStr = getValue(record, SPECIES); |
95 | 95 |
String infraspStr = getValue(record, INFRASP_EPITHET); |
96 |
String publicationStr = getValue(record, PUBLICATION); |
|
97 |
String publDateStr = getValue(record, PUBL_DATE); |
|
96 | 98 |
String authorsStr = getValue(record, AUTHORS); |
97 | 99 |
String ipniId = getValue(record, IPNI_ID); |
98 | 100 |
String notes = getValue(record, NOTES); |
... | ... | |
104 | 106 |
try { |
105 | 107 |
//create name |
106 | 108 |
Rank rank = state.getTransformer().getRankByKey(rankStr); |
109 |
if (rank == null) { |
|
110 |
logger.warn(row + "rank not recognized: " + rankStr); |
|
111 |
} |
|
107 | 112 |
TaxonName name = TaxonNameFactory.NewBotanicalInstance(rank); |
108 | 113 |
|
109 | 114 |
//fill simple |
110 | 115 |
name.setGenusOrUninomial(Ne(genusStr)); |
111 | 116 |
name.setInfraGenericEpithet(Ne(infragenStr)); |
117 |
if (speciesStr != null && speciesStr.startsWith("×")) { |
|
118 |
name.setBinomHybrid(true); |
|
119 |
speciesStr = speciesStr.replace("×", "").trim(); |
|
120 |
} |
|
112 | 121 |
name.setSpecificEpithet(Ne(speciesStr)); |
113 | 122 |
name.setInfraSpecificEpithet(Ne(infraspStr)); |
114 |
NonViralNameParserImpl.NewInstance().parseAuthors(name, authorsStr); |
|
123 |
try { |
|
124 |
NonViralNameParserImpl.NewInstance().parseAuthors(name, authorsStr); |
|
125 |
} catch (Exception e) { |
|
126 |
name.setAuthorshipCache(authorsStr, true); |
|
127 |
logger.warn(row + "authorship not parsable: " + authorsStr); |
|
128 |
} |
|
115 | 129 |
|
116 |
//TODO ??publication + PublDate |
|
130 |
//publication |
|
131 |
publicationStr = normalizePublication(publicationStr); |
|
132 |
if (isNotBlank(publicationStr) || isNotBlank(publDateStr)) { |
|
133 |
DoubleResult<String, String> publ = new DoubleResult<>(publicationStr, publDateStr); |
|
134 |
origPublicationMap.put(nomenId, publ); |
|
135 |
} |
|
117 | 136 |
|
118 | 137 |
//ipni ID |
119 | 138 |
if (isNotBlank(ipniId)) { |
120 | 139 |
name.addIdentifier(ipniId, DefinedTerm.IDENTIFIER_NAME_IPNI()); |
121 | 140 |
} |
122 | 141 |
|
142 |
//add ID |
|
143 |
DefinedTerm sileneaeInfoNameIdType = getIdentiferType(state, |
|
144 |
uuidSileneaeInfoNameIdType, null, null, null, null); |
|
145 |
name.addIdentifier(nomenId.toString(), sileneaeInfoNameIdType); |
|
146 |
|
|
123 | 147 |
//notes |
124 | 148 |
if (isNotBlank(notes)) { |
125 |
Annotation annotation = Annotation.NewDefaultLanguageInstance(notes); |
|
126 |
annotation.setAnnotationType(AnnotationType.TECHNICAL()); |
|
127 |
name.addAnnotation(annotation); |
|
149 |
handleNotes(name, notes, row); |
|
128 | 150 |
} |
129 | 151 |
|
130 |
//nominval flag |
|
152 |
//nom.inval flag
|
|
131 | 153 |
if (nomInvalFlag != null && nomInvalFlag.trim().equalsIgnoreCase("yes")) { |
132 | 154 |
Reference ref = null; |
133 | 155 |
name.addStatus(NomenclaturalStatusType.INVALID(), ref, null); |
... | ... | |
137 | 159 |
if (isNotBlank(typeSpecimenStr)) { |
138 | 160 |
DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen); |
139 | 161 |
facade.setCollector(null); //just to create field unit and gathering event |
162 |
facade.innerFieldUnit().setTitleCache("Field Unit for: " + typeSpecimenStr, true); |
|
140 | 163 |
facade.innerDerivedUnit().setTitleCache(typeSpecimenStr, true); |
141 |
SpecimenTypeDesignationStatus status = null; //TODO
|
|
164 |
SpecimenTypeDesignationStatus status = SpecimenTypeDesignationStatus.UNSPECIFIC();
|
|
142 | 165 |
Reference ref = null; |
143 | 166 |
String originalInfo = null; |
144 | 167 |
name.addSpecimenTypeDesignation(facade.innerDerivedUnit(), status, ref, null, originalInfo, false, false); |
145 | 168 |
// save ?? |
146 | 169 |
} |
147 | 170 |
|
171 |
Reference sourceRef = getSourceReference(state); |
|
172 |
name.addImportSource(nomenId.toString(), "Names.nomen_ID", sourceRef, "row " + String.valueOf(line)); |
|
173 |
|
|
174 |
putToNameMap(nomenId, name); |
|
148 | 175 |
|
149 |
} catch (UndefinedTransformerMethodException | StringNotParsableException e) {
|
|
176 |
} catch (UndefinedTransformerMethodException e) { |
|
150 | 177 |
e.printStackTrace(); |
151 | 178 |
} |
152 | 179 |
} |
153 | 180 |
|
154 |
private String Ne(String genusStr) { |
|
155 |
return CdmUtils.Ne(genusStr); |
|
156 |
} |
|
157 |
|
|
158 |
private TaxonName dedupliateNameParts(TaxonName name) { |
|
159 |
if (state.getConfig().isDoDeduplicate()){ |
|
160 |
state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name); |
|
181 |
private String normalizePublication(String publicationStr) { |
|
182 |
if (isBlank(publicationStr)) { |
|
183 |
return null; |
|
161 | 184 |
} |
162 |
return name; |
|
163 |
} |
|
164 |
|
|
165 |
private String getOtherAuthors(List<TaxonName> otherNames) { |
|
166 |
String result = ""; |
|
167 |
for (TaxonName name : otherNames){ |
|
168 |
result = CdmUtils.concat(";", result, name.getAuthorshipCache()); |
|
185 |
if ("-".equalsIgnoreCase(publicationStr)) { |
|
186 |
return null; |
|
187 |
} |
|
188 |
if ("?".equalsIgnoreCase(publicationStr)) { |
|
189 |
return null; |
|
169 | 190 |
} |
170 |
return result; |
|
191 |
if ("??".equalsIgnoreCase(publicationStr)) { |
|
192 |
return null; |
|
193 |
} |
|
194 |
if ("none".equalsIgnoreCase(publicationStr)) { |
|
195 |
return null; |
|
196 |
} |
|
197 |
return publicationStr; |
|
171 | 198 |
} |
172 | 199 |
|
200 |
private String Ne(String genusStr) { |
|
201 |
return CdmUtils.Ne(genusStr); |
|
202 |
} |
|
173 | 203 |
|
174 |
private void handleNomenclRemarkAndNameStatus(String nomenclaturalRemarks, String row, boolean isNewName, TaxonName name, |
|
175 |
List<NomenclaturalStatusType> statusTypes) { |
|
204 |
private String handleNotes(TaxonName name, String notes, String row) { |
|
176 | 205 |
|
177 | 206 |
NomenclaturalStatusType remarkType = null; |
178 |
NomenclaturalStatusType statusType = statusTypes.isEmpty()? null: statusTypes.iterator().next(); |
|
179 |
if (nomenclaturalRemarks == null){ |
|
207 |
if (notes == null){ |
|
180 | 208 |
//nothing to do |
181 |
}else if (", nom. illeg.".equals(nomenclaturalRemarks)){ |
|
182 |
remarkType = NomenclaturalStatusType.ILLEGITIMATE(); |
|
183 |
}else if (", nom. cons.".equals(nomenclaturalRemarks)){ |
|
184 |
remarkType = NomenclaturalStatusType.CONSERVED(); |
|
185 |
}else if (", nom. nud.".equals(nomenclaturalRemarks)){ |
|
209 |
}else if ("ined".equals(notes)){ |
|
210 |
remarkType = NomenclaturalStatusType.INED(); |
|
211 |
notes = null; |
|
212 |
}else if ("nom. utique rej.".equals(notes)){ |
|
213 |
remarkType = NomenclaturalStatusType.UTIQUE_REJECTED(); |
|
214 |
notes = null; |
|
215 |
}else if ("nomen nudum".equalsIgnoreCase(notes)){ |
|
216 |
remarkType = NomenclaturalStatusType.NUDUM(); |
|
217 |
notes = null; |
|
218 |
}else if (notes.startsWith("nomen nudum") || notes.startsWith("Nomen nudum") ){ |
|
186 | 219 |
remarkType = NomenclaturalStatusType.NUDUM(); |
187 |
}else if (", nom. provis.".equals(nomenclaturalRemarks)){ |
|
188 |
remarkType = NomenclaturalStatusType.PROVISIONAL(); |
|
189 |
}else if (", nom. rej.".equals(nomenclaturalRemarks)){ |
|
220 |
}else if (notes.startsWith("nom. illeg.")){ |
|
221 |
remarkType = NomenclaturalStatusType.ILLEGITIMATE(); |
|
222 |
}else if (notes.startsWith("nom. inval")){ |
|
223 |
remarkType = NomenclaturalStatusType.INVALID(); |
|
224 |
}else if ("Nom. rej.".equals(notes)){ |
|
190 | 225 |
remarkType = NomenclaturalStatusType.REJECTED(); |
191 |
}else if (", nom. subnud.".equals(nomenclaturalRemarks)){ |
|
192 |
remarkType = NomenclaturalStatusType.SUBNUDUM(); |
|
193 |
}else if (", nom. superfl.".equals(nomenclaturalRemarks)){ |
|
194 |
remarkType = NomenclaturalStatusType.SUPERFLUOUS(); |
|
195 |
}else if (", not validly publ.".equals(nomenclaturalRemarks)){ |
|
196 |
statusTypes.add(NomenclaturalStatusType.INVALID()); |
|
197 |
}else if (", opus utique oppr.".equals(nomenclaturalRemarks)){ |
|
198 |
statusTypes.add(NomenclaturalStatusType.OPUS_UTIQUE_OPPR()); |
|
199 |
}else { |
|
200 |
logger.warn(row + "Unhandled nomenclatural remark: " + nomenclaturalRemarks); |
|
226 |
notes = null; |
|
201 | 227 |
} |
202 | 228 |
|
203 |
NomenclaturalStatusType kewType = remarkType != null? remarkType : statusType; |
|
204 |
if (isNewName){ |
|
205 |
if(remarkType != null && statusType != null && !remarkType.equals(statusType)){ |
|
206 |
logger.warn(row + "Kew suggests 2 different nom. status. types for new name. The status from nomenclatural_remarks was taken."); |
|
207 |
} |
|
208 |
if (kewType != null){ |
|
209 |
name.addStatus(kewType, getSecRef(state), null); |
|
210 |
} |
|
211 |
}else{ |
|
212 |
NomenclaturalStatusType existingType = null; |
|
213 |
if (!name.getStatus().isEmpty()){ |
|
214 |
existingType = name.getStatus().iterator().next().getType(); |
|
215 |
} |
|
216 |
if (existingType != null && kewType != null){ |
|
217 |
if (!existingType.equals(kewType)){ |
|
218 |
logger.warn(row + "Existing name status "+existingType.getTitleCache()+" differs from Kew status " + kewType.getTitleCache() + ". Key status ignored"); |
|
219 |
} |
|
220 |
}else if (existingType != null && kewType == null){ |
|
221 |
logger.warn(row + "Info: Existing name has a name status "+existingType.getTitleCache()+" but Kew name has no status. Existing status kept."); |
|
222 |
}else if (existingType == null && kewType != null){ |
|
223 |
if(remarkType != null && statusType != null && !remarkType.equals(statusType)){ |
|
224 |
logger.warn(row + "Existing name has no status while Kew name suggests a status (but 2 different status form status and nomenclatural_remarks field)."); |
|
225 |
}else{ |
|
226 |
logger.warn(row + "Existing name has no status while Kew name suggests a status ("+kewType.getTitleCache()+"). Kew status ignored."); |
|
229 |
//annotation |
|
230 |
if (isNotBlank(notes)) { |
|
231 |
Annotation annotation = Annotation.NewDefaultLanguageInstance(notes); |
|
232 |
annotation.setAnnotationType(AnnotationType.TECHNICAL()); |
|
233 |
name.addAnnotation(annotation); |
|
234 |
} |
|
235 |
|
|
236 |
//nom. status. |
|
237 |
if (remarkType != null) { |
|
238 |
if (!name.hasStatus(remarkType)) { |
|
239 |
|
|
240 |
Reference ref = null; |
|
241 |
name.addStatus(remarkType, ref, null); |
|
242 |
if (name.getStatus().size() > 1) { |
|
243 |
logger.warn(row + "name has >1 status: " + name.getFullTitleCache()); |
|
227 | 244 |
} |
228 | 245 |
} |
229 | 246 |
} |
247 |
|
|
248 |
return notes; |
|
230 | 249 |
} |
231 | 250 |
|
232 | 251 |
@Override |
... | ... | |
239 | 258 |
Integer typeSpeciesId = getInt(getValue(record, TYPE_SPECIES_LINK)); |
240 | 259 |
|
241 | 260 |
String row = String.valueOf(line) + "("+nomenId+"): "; |
242 |
if ((line % 500) == 0){ |
|
243 |
newTransaction(state); |
|
244 |
System.out.println(line); |
|
245 |
} |
|
261 |
// if ((line % 500) == 0){
|
|
262 |
// newTransaction(state);
|
|
263 |
// System.out.println(line);
|
|
264 |
// }
|
|
246 | 265 |
|
247 |
TaxonName name = getName(state, nomenId);
|
|
266 |
TaxonName name = getName(nomenId); |
|
248 | 267 |
if (name == null) { |
249 | 268 |
logger.warn(row + "Name does not exist"); |
250 | 269 |
return; |
251 | 270 |
} |
252 | 271 |
|
253 | 272 |
//basionym |
254 |
if (basionymId != null) { |
|
255 |
TaxonName basionym = getName(state, basionymId);
|
|
273 |
if (basionymId != null && !basionymId.equals(nomenId)) {
|
|
274 |
TaxonName basionym = getName(basionymId); |
|
256 | 275 |
if (basionym == null) { |
257 | 276 |
logger.warn(row + "basionym does not exist"); |
258 | 277 |
}else { |
... | ... | |
262 | 281 |
|
263 | 282 |
//type name |
264 | 283 |
if (typeSpeciesId != null) { |
265 |
TaxonName typeSpecies = getName(state, typeSpeciesId);
|
|
284 |
TaxonName typeSpecies = getName(typeSpeciesId); |
|
266 | 285 |
if (typeSpecies == null) { |
267 | 286 |
logger.warn(row + "typeSpecies does not exist"); |
268 | 287 |
}else { |
269 | 288 |
Reference ref = null; |
270 |
NameTypeDesignationStatus status = null; //TODO
|
|
289 |
NameTypeDesignationStatus status = null; // NameTypeDesignationStatus.NOT_APPLICABLE(); //TODO minor NameTypeDesignationStatus
|
|
271 | 290 |
name.addNameTypeDesignation(typeSpecies, ref, null, null, status, false); |
272 | 291 |
} |
273 | 292 |
} |
Also available in: Unified diff
ref #10242 final version of Sileneae import (before synonym cleanup=