1
|
/**
|
2
|
* Copyright (C) 2013 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.taxonx2013;
|
10
|
|
11
|
import java.awt.Dimension;
|
12
|
import java.io.StringWriter;
|
13
|
import java.util.ArrayList;
|
14
|
import java.util.Collections;
|
15
|
import java.util.HashMap;
|
16
|
import java.util.HashSet;
|
17
|
import java.util.List;
|
18
|
import java.util.Map;
|
19
|
import java.util.Scanner;
|
20
|
import java.util.Set;
|
21
|
import java.util.UUID;
|
22
|
|
23
|
import javax.swing.JFrame;
|
24
|
import javax.swing.JOptionPane;
|
25
|
import javax.swing.JScrollPane;
|
26
|
import javax.swing.JTextArea;
|
27
|
import javax.swing.UIManager;
|
28
|
import javax.xml.transform.OutputKeys;
|
29
|
import javax.xml.transform.Transformer;
|
30
|
import javax.xml.transform.TransformerException;
|
31
|
import javax.xml.transform.TransformerFactory;
|
32
|
import javax.xml.transform.TransformerFactoryConfigurationError;
|
33
|
import javax.xml.transform.dom.DOMSource;
|
34
|
import javax.xml.transform.stream.StreamResult;
|
35
|
|
36
|
import org.apache.commons.lang.StringUtils;
|
37
|
import org.apache.log4j.Logger;
|
38
|
import org.w3c.dom.Node;
|
39
|
import org.w3c.dom.NodeList;
|
40
|
|
41
|
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
|
42
|
import eu.etaxonomy.cdm.api.service.IAgentService;
|
43
|
import eu.etaxonomy.cdm.io.specimen.UnitsGatheringArea;
|
44
|
import eu.etaxonomy.cdm.io.specimen.UnitsGatheringEvent;
|
45
|
import eu.etaxonomy.cdm.model.agent.AgentBase;
|
46
|
import eu.etaxonomy.cdm.model.agent.Person;
|
47
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
48
|
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
|
49
|
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
|
50
|
import eu.etaxonomy.cdm.model.common.Language;
|
51
|
import eu.etaxonomy.cdm.model.common.TimePeriod;
|
52
|
import eu.etaxonomy.cdm.model.description.Feature;
|
53
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
54
|
import eu.etaxonomy.cdm.model.description.TextData;
|
55
|
import eu.etaxonomy.cdm.model.location.NamedArea;
|
56
|
import eu.etaxonomy.cdm.model.name.INonViralName;
|
57
|
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
|
58
|
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
|
59
|
import eu.etaxonomy.cdm.model.name.Rank;
|
60
|
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
|
61
|
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
|
62
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
63
|
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
|
64
|
import eu.etaxonomy.cdm.model.occurrence.GatheringEvent;
|
65
|
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
|
66
|
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
|
67
|
import eu.etaxonomy.cdm.model.reference.IBook;
|
68
|
import eu.etaxonomy.cdm.model.reference.IBookSection;
|
69
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
70
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
71
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
72
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
73
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
74
|
import eu.etaxonomy.cdm.persistence.dto.UuidAndTitleCache;
|
75
|
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
|
76
|
import eu.etaxonomy.cdm.strategy.parser.ParserProblem;
|
77
|
|
78
|
|
79
|
/**
|
80
|
* @author pkelbert
|
81
|
\* @since 2 avr. 2013
|
82
|
*
|
83
|
*/
|
84
|
public class TaxonXExtractor {
|
85
|
|
86
|
protected TaxonXImport importer;
|
87
|
protected TaxonXImportState state2;
|
88
|
private final Map<String,String> namesAsked = new HashMap<String, String>();
|
89
|
private final Map<String,Rank>ranksAsked = new HashMap<String, Rank>();
|
90
|
|
91
|
Logger logger = Logger.getLogger(TaxonXExtractor.class);
|
92
|
|
93
|
public class ReferenceBuilder{
|
94
|
private int nbRef=0;
|
95
|
private boolean foundBibref=false;
|
96
|
private final TaxonXAddSources sourceHandler;
|
97
|
|
98
|
/**
|
99
|
* @param sourceHandler
|
100
|
*/
|
101
|
public ReferenceBuilder(TaxonXAddSources sourceHandler) {
|
102
|
this.sourceHandler=sourceHandler;
|
103
|
}
|
104
|
|
105
|
/**
|
106
|
* @return the foundBibref
|
107
|
*/
|
108
|
public boolean isFoundBibref() {
|
109
|
return foundBibref;
|
110
|
}
|
111
|
|
112
|
/**
|
113
|
* @param foundBibref the foundBibref to set
|
114
|
*/
|
115
|
public void setFoundBibref(boolean foundBibref) {
|
116
|
this.foundBibref = foundBibref;
|
117
|
}
|
118
|
|
119
|
|
120
|
/**
|
121
|
* @param ref
|
122
|
* @param refMods
|
123
|
*/
|
124
|
public void builReference(String mref, String treatmentMainName, NomenclaturalCode nomenclaturalCode,
|
125
|
Taxon acceptedTaxon, Reference refMods) {
|
126
|
// System.out.println("builReference "+mref);
|
127
|
this.setFoundBibref(true);
|
128
|
|
129
|
String ref= mref;
|
130
|
if ( (ref.endsWith(";") ||ref.endsWith(",") ) && ((ref.length())>1)) {
|
131
|
ref=ref.substring(0, ref.length()-1)+".";
|
132
|
}
|
133
|
if (ref.startsWith(treatmentMainName) && !ref.endsWith(treatmentMainName)) {
|
134
|
ref=ref.replace(treatmentMainName, "");
|
135
|
ref=ref.trim();
|
136
|
while (ref.startsWith(".") || ref.startsWith(",")) {
|
137
|
ref=ref.replace(".","").replace(",","").trim();
|
138
|
}
|
139
|
}
|
140
|
|
141
|
// logger.info("Current reference :"+nbRef+", "+ref+", "+treatmentMainName+"--"+ref.indexOf(treatmentMainName));
|
142
|
Reference reference = ReferenceFactory.newGeneric();
|
143
|
reference.setTitleCache(ref, true);
|
144
|
|
145
|
//only add the first one if there is no nomenclatural reference yet
|
146
|
if (nbRef==0){
|
147
|
if(acceptedTaxon.getName().getNomenclaturalReference()==null){
|
148
|
acceptedTaxon.getName().setNomenclaturalReference(reference);
|
149
|
sourceHandler.addSource(refMods, acceptedTaxon);
|
150
|
}
|
151
|
}
|
152
|
//add all other references as Feature.Citation
|
153
|
TaxonDescription taxonDescription =importer.getTaxonDescription(acceptedTaxon, false, true);
|
154
|
acceptedTaxon.addDescription(taxonDescription);
|
155
|
sourceHandler.addSource(refMods, acceptedTaxon);
|
156
|
|
157
|
TextData textData = TextData.NewInstance(Feature.CITATION());
|
158
|
Language language = Language.DEFAULT();
|
159
|
textData.putText(language, ref);
|
160
|
sourceHandler.addSource(reference, textData,acceptedTaxon.getName(),refMods);
|
161
|
taxonDescription.addElement(textData);
|
162
|
|
163
|
sourceHandler.addSource(refMods, taxonDescription);
|
164
|
|
165
|
importer.getTaxonService().saveOrUpdate(acceptedTaxon);
|
166
|
// logger.warn("BWAAHHHH: "+nameToBeFilled.getParsingProblems()+", "+ref);
|
167
|
nbRef++;
|
168
|
|
169
|
}
|
170
|
|
171
|
}
|
172
|
|
173
|
public class MySpecimenOrObservation{
|
174
|
String descr="";
|
175
|
DerivedUnit derivedUnitBase=null;
|
176
|
|
177
|
public String getDescr() {
|
178
|
return descr;
|
179
|
}
|
180
|
public void setDescr(String descr) {
|
181
|
this.descr = descr;
|
182
|
}
|
183
|
public DerivedUnit getDerivedUnitBase() {
|
184
|
return derivedUnitBase;
|
185
|
}
|
186
|
public void setDerivedUnitBase(DerivedUnit derivedUnitBase) {
|
187
|
this.derivedUnitBase = derivedUnitBase;
|
188
|
}
|
189
|
|
190
|
|
191
|
|
192
|
|
193
|
}
|
194
|
|
195
|
/**
|
196
|
* @param item
|
197
|
* @return
|
198
|
*/
|
199
|
@SuppressWarnings({ "unused", "rawtypes" })
|
200
|
protected MySpecimenOrObservation extractSpecimenOrObservation(Node specimenObservationNode, DerivedUnit derivedUnitBase,
|
201
|
SpecimenOrObservationType defaultAssociation, TaxonName typifiableName) {
|
202
|
String country=null;
|
203
|
String locality=null;
|
204
|
String stateprov=null;
|
205
|
String collector=null;
|
206
|
String fieldNumber=null;
|
207
|
Double latitude=null,longitude=null;
|
208
|
TimePeriod tp =null;
|
209
|
String day,month,year="";
|
210
|
String descr="not available";
|
211
|
String type="";
|
212
|
boolean asso=false;
|
213
|
NodeList eventContent =null;
|
214
|
// create facade
|
215
|
DerivedUnitFacade derivedUnitFacade = null;
|
216
|
|
217
|
UnitsGatheringEvent unitsGatheringEvent;
|
218
|
UnitsGatheringArea unitsGatheringArea;
|
219
|
DefinedTermBase areaCountry;
|
220
|
|
221
|
MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
|
222
|
|
223
|
NodeList xmldata= specimenObservationNode.getChildNodes();
|
224
|
for (int n=0;n<xmldata.getLength();n++){
|
225
|
eventContent=xmldata.item(n).getChildNodes();
|
226
|
if (xmldata.item(n).getNodeName().equalsIgnoreCase("tax:xmldata")){
|
227
|
asso=true;
|
228
|
country=null;
|
229
|
locality=null;
|
230
|
stateprov=null;
|
231
|
collector=null;
|
232
|
fieldNumber=null;
|
233
|
latitude=null;
|
234
|
longitude=null;
|
235
|
day="";
|
236
|
month="";
|
237
|
year="";
|
238
|
type="";
|
239
|
for (int j=0;j<eventContent.getLength();j++){
|
240
|
if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:country")){
|
241
|
country=eventContent.item(j).getTextContent().trim();
|
242
|
}
|
243
|
else if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:locality")){
|
244
|
locality=eventContent.item(j).getTextContent().trim();
|
245
|
}
|
246
|
else if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:stateprovince")){
|
247
|
stateprov=eventContent.item(j).getTextContent().trim();
|
248
|
}
|
249
|
else if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:collector")){
|
250
|
collector=eventContent.item(j).getTextContent().trim();
|
251
|
}
|
252
|
else if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:yearcollected")){
|
253
|
year=eventContent.item(j).getTextContent().trim();
|
254
|
}
|
255
|
else if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:monthcollected")){
|
256
|
month=eventContent.item(j).getTextContent().trim();
|
257
|
}
|
258
|
else if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:daycollected")){
|
259
|
day=eventContent.item(j).getTextContent().trim();
|
260
|
}
|
261
|
else if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:decimallongitude")){
|
262
|
String tmp = eventContent.item(j).getTextContent().trim();
|
263
|
try{longitude=Double.valueOf(tmp);}catch(Exception e){logger.warn("longitude is not a number");}
|
264
|
}
|
265
|
else if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:decimallatitude")){
|
266
|
String tmp = eventContent.item(j).getTextContent().trim();
|
267
|
try{latitude=Double.valueOf(tmp);}catch(Exception e){logger.warn("latitude is not a number");}
|
268
|
}else if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:TypeStatus")){
|
269
|
type = eventContent.item(j).getTextContent().trim();
|
270
|
}else if(eventContent.item(j).getNodeName().equalsIgnoreCase("#text") && StringUtils.isBlank(eventContent.item(j).getTextContent())){
|
271
|
//do nothing
|
272
|
}
|
273
|
else {
|
274
|
logger.info("UNEXTRACTED FIELD FOR SPECIMEN "+eventContent.item(j).getNodeName()+", "+eventContent.item(j).getTextContent()) ;
|
275
|
}
|
276
|
}
|
277
|
if (!day.isEmpty() || !month.isEmpty() || !year.isEmpty()){
|
278
|
try{
|
279
|
if (!year.isEmpty()) {
|
280
|
tp = TimePeriod.NewInstance(Integer.parseInt(year));
|
281
|
if (!month.isEmpty()) {
|
282
|
tp.setStartMonth(Integer.parseInt(month));
|
283
|
if (!day.isEmpty()) {
|
284
|
tp.setStartDay(Integer.parseInt(day));
|
285
|
}
|
286
|
}
|
287
|
|
288
|
}
|
289
|
}catch(Exception e){
|
290
|
logger.warn("Collection date error "+e);
|
291
|
}
|
292
|
}
|
293
|
}
|
294
|
if(xmldata.item(n).getNodeName().equalsIgnoreCase("#text")){
|
295
|
descr=xmldata.item(n).getTextContent().replaceAll(";","").trim();
|
296
|
if (descr.length()>1 && containsDistinctLetters(descr)) {
|
297
|
specimenOrObservation.setDescr(descr);
|
298
|
asso=true;
|
299
|
}
|
300
|
}
|
301
|
if(xmldata.item(n).getNodeName().equalsIgnoreCase("tax:p")){
|
302
|
descr=xmldata.item(n).getTextContent().replaceAll(";","").trim();
|
303
|
if (descr.length()>1 && containsDistinctLetters(descr)) {
|
304
|
specimenOrObservation.setDescr(descr);
|
305
|
asso=true;
|
306
|
}
|
307
|
}
|
308
|
}
|
309
|
// if(asso && descr.length()>1){
|
310
|
|
311
|
// logger.info("DESCR: "+descr);
|
312
|
if (!type.isEmpty()) {
|
313
|
if (!containsDistinctLetters(type)) {
|
314
|
type="no description text";
|
315
|
}
|
316
|
derivedUnitFacade = getFacade(type.replaceAll(";",""), defaultAssociation);
|
317
|
SpecimenTypeDesignation designation = SpecimenTypeDesignation.NewInstance();
|
318
|
|
319
|
if (typifiableName != null){
|
320
|
typifiableName.addTypeDesignation(designation, true);
|
321
|
}else{
|
322
|
logger.warn("No typifiable name available");
|
323
|
}
|
324
|
SpecimenTypeDesignationStatus stds= getSpecimenTypeDesignationStatusByKey(type);
|
325
|
if (stds !=null) {
|
326
|
stds = (SpecimenTypeDesignationStatus) importer.getTermService().find(stds.getUuid());
|
327
|
}
|
328
|
|
329
|
designation.setTypeStatus(stds);
|
330
|
derivedUnitFacade.innerDerivedUnit().addSpecimenTypeDesignation(designation);
|
331
|
|
332
|
derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
|
333
|
// System.out.println("derivedUnitBase: "+derivedUnitBase);
|
334
|
// designation.setTypeSpecimen(derivedUnitBase);
|
335
|
// TaxonName name = taxon.getName();
|
336
|
// name.addTypeDesignation(designation, true);
|
337
|
} else {
|
338
|
if (!containsDistinctLetters(descr.replaceAll(";",""))) {
|
339
|
descr="no description text";
|
340
|
}
|
341
|
|
342
|
derivedUnitFacade = getFacade(descr.replaceAll(";",""), defaultAssociation);
|
343
|
derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
|
344
|
// System.out.println("derivedUnitBase2: "+derivedUnitBase);
|
345
|
}
|
346
|
|
347
|
unitsGatheringEvent = new UnitsGatheringEvent(importer.getTermService(), locality,collector,longitude, latitude,
|
348
|
state2.getConfig(),importer.getAgentService());
|
349
|
|
350
|
if(tp!=null) {
|
351
|
unitsGatheringEvent.setGatheringDate(tp);
|
352
|
}
|
353
|
|
354
|
// country
|
355
|
unitsGatheringArea = new UnitsGatheringArea();
|
356
|
unitsGatheringArea.setParams(null, country, state2.getConfig(), importer.getTermService(), importer.getOccurrenceService(), importer.getVocabularyService());
|
357
|
//TODO other areas
|
358
|
if (StringUtils.isNotBlank(stateprov)){
|
359
|
Map<String, String> namedAreas = new HashMap<String, String>();
|
360
|
namedAreas.put(stateprov, null);
|
361
|
unitsGatheringArea.setAreaNames(namedAreas, state2.getConfig(), importer.getTermService(), importer.getVocabularyService());
|
362
|
}
|
363
|
|
364
|
areaCountry = unitsGatheringArea.getCountry();
|
365
|
|
366
|
// // other areas
|
367
|
// unitsGatheringArea = new UnitsGatheringArea(namedAreaList,dataHolder.getTermService());
|
368
|
// ArrayList<DefinedTermBase> nas = unitsGatheringArea.getAreas();
|
369
|
// for (DefinedTermBase namedArea : nas) {
|
370
|
// unitsGatheringEvent.addArea(namedArea);
|
371
|
// }
|
372
|
|
373
|
// copy gathering event to facade
|
374
|
GatheringEvent gatheringEvent = unitsGatheringEvent.getGatheringEvent();
|
375
|
derivedUnitFacade.setGatheringEvent(gatheringEvent);
|
376
|
derivedUnitFacade.setLocality(gatheringEvent.getLocality());
|
377
|
derivedUnitFacade.setExactLocation(gatheringEvent.getExactLocation());
|
378
|
derivedUnitFacade.setCollector(gatheringEvent.getCollector());
|
379
|
derivedUnitFacade.setCountry((NamedArea)areaCountry);
|
380
|
|
381
|
for(DefinedTermBase<?> area:unitsGatheringArea.getAreas()){
|
382
|
derivedUnitFacade.addCollectingArea((NamedArea) area);
|
383
|
}
|
384
|
// derivedUnitFacade.addCollectingAreas(unitsGatheringArea.getAreas());
|
385
|
|
386
|
// add fieldNumber
|
387
|
if (fieldNumber != null) {
|
388
|
derivedUnitFacade.setFieldNumber(fieldNumber);
|
389
|
}
|
390
|
specimenOrObservation.setDerivedUnitBase(derivedUnitBase);
|
391
|
// }
|
392
|
return specimenOrObservation;
|
393
|
}
|
394
|
|
395
|
|
396
|
private SpecimenTypeDesignationStatus getSpecimenTypeDesignationStatusByKey(
|
397
|
String key) {
|
398
|
if (key == null) {
|
399
|
return null;
|
400
|
} else if (key.matches("(?i)(T|Type)")) {
|
401
|
return SpecimenTypeDesignationStatus.TYPE();
|
402
|
} else if (key.matches("(?i)(HT|Holotype)")) {
|
403
|
return SpecimenTypeDesignationStatus.HOLOTYPE();
|
404
|
} else if (key.matches("(?i)(LT|Lectotype)")) {
|
405
|
return SpecimenTypeDesignationStatus.LECTOTYPE();
|
406
|
} else if (key.matches("(?i)(NT|Neotype)")) {
|
407
|
return SpecimenTypeDesignationStatus.NEOTYPE();
|
408
|
} else if (key.matches("(?i)(ST|Syntype)")) {
|
409
|
return SpecimenTypeDesignationStatus.SYNTYPE();
|
410
|
} else if (key.matches("(?i)(ET|Epitype)")) {
|
411
|
return SpecimenTypeDesignationStatus.EPITYPE();
|
412
|
} else if (key.matches("(?i)(IT|Isotype)")) {
|
413
|
return SpecimenTypeDesignationStatus.ISOTYPE();
|
414
|
} else if (key.matches("(?i)(ILT|Isolectotype)")) {
|
415
|
return SpecimenTypeDesignationStatus.ISOLECTOTYPE();
|
416
|
} else if (key.matches("(?i)(INT|Isoneotype)")) {
|
417
|
return SpecimenTypeDesignationStatus.ISONEOTYPE();
|
418
|
} else if (key.matches("(?i)(IET|Isoepitype)")) {
|
419
|
return SpecimenTypeDesignationStatus.ISOEPITYPE();
|
420
|
} else if (key.matches("(?i)(PT|Paratype)")) {
|
421
|
return SpecimenTypeDesignationStatus.PARATYPE();
|
422
|
} else if (key.matches("(?i)(PLT|Paralectotype)")) {
|
423
|
return SpecimenTypeDesignationStatus.PARALECTOTYPE();
|
424
|
} else if (key.matches("(?i)(PNT|Paraneotype)")) {
|
425
|
return SpecimenTypeDesignationStatus.PARANEOTYPE();
|
426
|
} else if (key.matches("(?i)(unsp.|Unspecified)")) {
|
427
|
return SpecimenTypeDesignationStatus.UNSPECIFIC();
|
428
|
} else if (key.matches("(?i)(2LT|Second Step Lectotype)")) {
|
429
|
return SpecimenTypeDesignationStatus.SECOND_STEP_LECTOTYPE();
|
430
|
} else if (key.matches("(?i)(2NT|Second Step Neotype)")) {
|
431
|
return SpecimenTypeDesignationStatus.SECOND_STEP_NEOTYPE();
|
432
|
} else if (key.matches("(?i)(OM|Original Material)")) {
|
433
|
return SpecimenTypeDesignationStatus.ORIGINAL_MATERIAL();
|
434
|
} else if (key.matches("(?i)(IcT|Iconotype)")) {
|
435
|
return SpecimenTypeDesignationStatus.ICONOTYPE();
|
436
|
} else if (key.matches("(?i)(PT|Phototype)")) {
|
437
|
return SpecimenTypeDesignationStatus.PHOTOTYPE();
|
438
|
} else if (key.matches("(?i)(IST|Isosyntype)")) {
|
439
|
return SpecimenTypeDesignationStatus.ISOSYNTYPE();
|
440
|
} else {
|
441
|
return null;
|
442
|
}
|
443
|
}
|
444
|
protected DerivedUnitFacade getFacade(String recordBasis, SpecimenOrObservationType defaultAssoc) {
|
445
|
// System.out.println("getFacade() for "+recordBasis+", defaultassociation: "+defaultAssoc);
|
446
|
SpecimenOrObservationType type = null;
|
447
|
|
448
|
// create specimen
|
449
|
if (recordBasis != null) {
|
450
|
String recordBasisL = recordBasis.toLowerCase();
|
451
|
if (recordBasisL.startsWith("specimen") || recordBasisL.contains("specimen") || recordBasisL.contains("type")) {// specimen
|
452
|
type = SpecimenOrObservationType.PreservedSpecimen;
|
453
|
}
|
454
|
if (recordBasisL.startsWith("observation")) {
|
455
|
type = SpecimenOrObservationType.Observation;
|
456
|
}
|
457
|
if (recordBasisL.contains("fossil")) {
|
458
|
type = SpecimenOrObservationType.Fossil;
|
459
|
}
|
460
|
|
461
|
if (recordBasisL.startsWith("living")) {
|
462
|
type = SpecimenOrObservationType.LivingSpecimen;
|
463
|
}
|
464
|
if (type == null) {
|
465
|
logger.info("The basis of record does not seem to be known: *" + recordBasisL+"*");
|
466
|
type = defaultAssoc;
|
467
|
}
|
468
|
// TODO fossils?
|
469
|
} else {
|
470
|
logger.info("The basis of record is null");
|
471
|
type = defaultAssoc;
|
472
|
}
|
473
|
DerivedUnitFacade derivedUnitFacade = DerivedUnitFacade.NewInstance(type);
|
474
|
return derivedUnitFacade;
|
475
|
}
|
476
|
|
477
|
|
478
|
|
479
|
@SuppressWarnings("rawtypes")
|
480
|
protected Feature makeFeature(SpecimenOrObservationBase unit) {
|
481
|
if (unit == null){
|
482
|
return null;
|
483
|
}
|
484
|
SpecimenOrObservationType type = unit.getRecordBasis();
|
485
|
|
486
|
if (type.isFeatureObservation()){
|
487
|
return Feature.OBSERVATION();
|
488
|
}else if (type.isPreservedSpecimen() ||
|
489
|
type == SpecimenOrObservationType.LivingSpecimen ||
|
490
|
type == SpecimenOrObservationType.OtherSpecimen
|
491
|
){
|
492
|
return Feature.SPECIMEN();
|
493
|
}else if (type == SpecimenOrObservationType.Unknown ||
|
494
|
type == SpecimenOrObservationType.DerivedUnit
|
495
|
) {
|
496
|
return Feature.INDIVIDUALS_ASSOCIATION();
|
497
|
}
|
498
|
logger.warn("No feature defined for derived unit class: "
|
499
|
+ unit.getClass().getSimpleName());
|
500
|
return null;
|
501
|
}
|
502
|
|
503
|
|
504
|
protected final static String SPLITTER = ",";
|
505
|
|
506
|
|
507
|
protected int askQuestion(String question){
|
508
|
Scanner scan = new Scanner(System.in);
|
509
|
logger.info(question);
|
510
|
int index = scan.nextInt();
|
511
|
return index;
|
512
|
}
|
513
|
|
514
|
|
515
|
/**
|
516
|
* @param reftype
|
517
|
* @return
|
518
|
*/
|
519
|
protected Reference getReferenceWithType(int reftype) {
|
520
|
Reference ref = null;
|
521
|
switch (reftype) {
|
522
|
case 1:
|
523
|
ref = ReferenceFactory.newGeneric();
|
524
|
break;
|
525
|
case 2:
|
526
|
IBook tmp= ReferenceFactory.newBook();
|
527
|
ref = (Reference)tmp;
|
528
|
break;
|
529
|
case 3:
|
530
|
ref = ReferenceFactory.newArticle();
|
531
|
break;
|
532
|
case 4:
|
533
|
IBookSection tmp2 = ReferenceFactory.newBookSection();
|
534
|
ref = (Reference)tmp2;
|
535
|
break;
|
536
|
case 5:
|
537
|
ref = ReferenceFactory.newJournal();
|
538
|
break;
|
539
|
case 6:
|
540
|
ref = ReferenceFactory.newPrintSeries();
|
541
|
break;
|
542
|
case 7:
|
543
|
ref = ReferenceFactory.newThesis();
|
544
|
break;
|
545
|
default:
|
546
|
break;
|
547
|
}
|
548
|
return ref;
|
549
|
}
|
550
|
/**
|
551
|
* @param unitsList
|
552
|
* @param state
|
553
|
*/
|
554
|
protected void prepareCollectors(TaxonXImportState state,IAgentService agentService) {
|
555
|
// logger.info("PREPARE COLLECTORS");
|
556
|
List<String> collectors = new ArrayList<String>();
|
557
|
String tmp;
|
558
|
List<String> collectorsU = new ArrayList<String>(new HashSet<String>(collectors));
|
559
|
Set<UUID> uuids = new HashSet<UUID>();
|
560
|
|
561
|
//existing persons in DB
|
562
|
List<UuidAndTitleCache<Person>> hiberPersons = agentService.getPersonUuidAndTitleCache();
|
563
|
Map<String,Person> titleCachePerson = new HashMap<String, Person>();
|
564
|
uuids = new HashSet<UUID>();
|
565
|
for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
|
566
|
uuids.add(hibernateP.getUuid());
|
567
|
}
|
568
|
|
569
|
if (!uuids.isEmpty()){
|
570
|
List<AgentBase> existingPersons = agentService.find(uuids);
|
571
|
for (AgentBase existingP:existingPersons){
|
572
|
titleCachePerson.put(existingP.getTitleCache(),CdmBase.deproxy(existingP, Person.class));
|
573
|
}
|
574
|
}
|
575
|
|
576
|
Map<String,UUID> personMap = new HashMap<String, UUID>();
|
577
|
for (UuidAndTitleCache<Person> person:hiberPersons){
|
578
|
personMap.put(person.getTitleCache(), person.getUuid());
|
579
|
}
|
580
|
|
581
|
java.util.Collection<AgentBase> personToadd = new ArrayList<AgentBase>();
|
582
|
|
583
|
for (String collector:collectorsU){
|
584
|
Person p = Person.NewInstance();
|
585
|
p.setTitleCache(collector,true);
|
586
|
if (!personMap.containsKey(p.getTitleCache())){
|
587
|
personToadd.add(p);
|
588
|
}
|
589
|
}
|
590
|
|
591
|
if(!personToadd.isEmpty()){
|
592
|
Map<UUID, AgentBase> uuuidPerson = agentService.save(personToadd);
|
593
|
for (UUID u:uuuidPerson.keySet()){
|
594
|
titleCachePerson.put(uuuidPerson.get(u).getTitleCache(), CdmBase.deproxy(uuuidPerson.get(u), Person.class));
|
595
|
}
|
596
|
}
|
597
|
|
598
|
state.getConfig().setPersons(titleCachePerson);
|
599
|
}
|
600
|
|
601
|
/**
|
602
|
* @param name
|
603
|
* @return
|
604
|
*/
|
605
|
protected String getFullReference(String name, List<ParserProblem> problems) {
|
606
|
// logger.info("getFullReference for "+ name);
|
607
|
JTextArea textArea = new JTextArea("Complete the reference or the name '"+name+"'.\nThe current problem is "+StringUtils.join(problems,"--"));
|
608
|
JScrollPane scrollPane = new JScrollPane(textArea);
|
609
|
textArea.setLineWrap(true);
|
610
|
textArea.setWrapStyleWord(true);
|
611
|
scrollPane.setPreferredSize( new Dimension( 700, 70 ) );
|
612
|
|
613
|
// JFrame frame = new JFrame("I have a question");
|
614
|
// frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
|
615
|
String s = (String)JOptionPane.showInputDialog(
|
616
|
null,
|
617
|
scrollPane,
|
618
|
"Get full reference or name",
|
619
|
JOptionPane.PLAIN_MESSAGE,
|
620
|
null,
|
621
|
null,
|
622
|
name);
|
623
|
return s;
|
624
|
}
|
625
|
|
626
|
|
627
|
|
628
|
/**
|
629
|
* @param name
|
630
|
* @return
|
631
|
* @throws TransformerException
|
632
|
* @throws TransformerFactoryConfigurationError
|
633
|
*/
|
634
|
protected String askWhichScientificName(String fullname,String atomised,String classificationName, Node fullParagraph) throws TransformerFactoryConfigurationError, TransformerException {
|
635
|
// logger.info("getScientificName for "+ fullname);
|
636
|
// JFrame frame = new JFrame("I have a question");
|
637
|
// frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
|
638
|
String k = fullname+"_"+atomised;
|
639
|
|
640
|
String defaultN = "";
|
641
|
if (atomised.length()>fullname.length()) {
|
642
|
defaultN=atomised;
|
643
|
} else {
|
644
|
defaultN=fullname;
|
645
|
}
|
646
|
|
647
|
if (namesAsked.containsKey(k)){
|
648
|
return namesAsked.get(k);
|
649
|
}
|
650
|
else{
|
651
|
//activate it for ants because a lot of markup is incomplete
|
652
|
if (classificationName.indexOf("Ants")>-1) {
|
653
|
return defaultN;
|
654
|
}
|
655
|
|
656
|
JTextArea textArea = new JTextArea("The names in the free text and in the xml tags do not match : "+fullname+
|
657
|
", or "+atomised+"\n"+formatNode(fullParagraph));
|
658
|
JScrollPane scrollPane = new JScrollPane(textArea);
|
659
|
textArea.setLineWrap(true);
|
660
|
textArea.setWrapStyleWord(true);
|
661
|
scrollPane.setPreferredSize( new Dimension( 700, 200 ) );
|
662
|
String s = (String)JOptionPane.showInputDialog(
|
663
|
null,
|
664
|
scrollPane,
|
665
|
"Which name do I have to use? The current classification is "+classificationName,
|
666
|
JOptionPane.PLAIN_MESSAGE,
|
667
|
null,
|
668
|
null,
|
669
|
defaultN);
|
670
|
namesAsked.put(k, s);
|
671
|
return s;
|
672
|
}
|
673
|
}
|
674
|
|
675
|
|
676
|
protected int askAddParent(String s){
|
677
|
// boolean hack=true;
|
678
|
// if (hack) {
|
679
|
// return 1;
|
680
|
// }
|
681
|
JTextArea textArea = new JTextArea("If you want to add a parent taxa for "+s+", click \"Yes\"." +
|
682
|
" If it is a root for this classification, click \"No\" or \"Cancel\".");
|
683
|
JScrollPane scrollPane = new JScrollPane(textArea);
|
684
|
textArea.setLineWrap(true);
|
685
|
textArea.setWrapStyleWord(true);
|
686
|
scrollPane.setPreferredSize( new Dimension( 600, 70 ) );
|
687
|
|
688
|
Object[] options = { UIManager.getString("OptionPane.yesButtonText"),
|
689
|
UIManager.getString("OptionPane.noButtonText")};
|
690
|
|
691
|
|
692
|
int addTaxon = JOptionPane.showOptionDialog(null,
|
693
|
scrollPane,
|
694
|
"",
|
695
|
JOptionPane.YES_NO_OPTION,
|
696
|
0,
|
697
|
null,
|
698
|
options,
|
699
|
options[1]);
|
700
|
return addTaxon;
|
701
|
}
|
702
|
|
703
|
protected String askSetParent(String s){
|
704
|
JTextArea textArea = new JTextArea("What is the first taxon parent for "+s+"?\n"+
|
705
|
"The rank will be asked later. ");
|
706
|
JScrollPane scrollPane = new JScrollPane(textArea);
|
707
|
textArea.setLineWrap(true);
|
708
|
textArea.setWrapStyleWord(true);
|
709
|
scrollPane.setPreferredSize( new Dimension( 700, 200 ) );
|
710
|
|
711
|
String s2 = (String)JOptionPane.showInputDialog(
|
712
|
null,
|
713
|
scrollPane,
|
714
|
"",
|
715
|
JOptionPane.PLAIN_MESSAGE,
|
716
|
null,
|
717
|
null,
|
718
|
s);
|
719
|
return s2;
|
720
|
}
|
721
|
|
722
|
protected String askRank(String s, List<String> rankListStr){
|
723
|
JTextArea textArea = new JTextArea("What is the rank for "+s+"?");
|
724
|
JScrollPane scrollPane = new JScrollPane(textArea);
|
725
|
textArea.setLineWrap(true);
|
726
|
textArea.setWrapStyleWord(true);
|
727
|
scrollPane.setPreferredSize( new Dimension( 700, 200 ) );
|
728
|
|
729
|
String r = (String)JOptionPane.showInputDialog(
|
730
|
null,
|
731
|
scrollPane,
|
732
|
"",
|
733
|
JOptionPane.PLAIN_MESSAGE,
|
734
|
null,
|
735
|
rankListStr.toArray(),
|
736
|
null);
|
737
|
return r;
|
738
|
}
|
739
|
|
740
|
/**
|
741
|
* @param name
|
742
|
* @return
|
743
|
* @throws TransformerException
|
744
|
* @throws TransformerFactoryConfigurationError
|
745
|
*/
|
746
|
protected String askFeatureName(String paragraph){
|
747
|
// logger.info("getScientificName for "+ fullname);
|
748
|
// JFrame frame = new JFrame("I have a question");
|
749
|
// frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
|
750
|
JTextArea textArea = new JTextArea("How should the feature be named? \n"+paragraph);
|
751
|
JScrollPane scrollPane = new JScrollPane(textArea);
|
752
|
textArea.setLineWrap(true);
|
753
|
textArea.setWrapStyleWord(true);
|
754
|
scrollPane.setPreferredSize( new Dimension( 700, 200 ) );
|
755
|
String s = (String)JOptionPane.showInputDialog(
|
756
|
null,
|
757
|
scrollPane,
|
758
|
"",
|
759
|
JOptionPane.PLAIN_MESSAGE,
|
760
|
null,
|
761
|
null,
|
762
|
"Other");
|
763
|
return s;
|
764
|
}
|
765
|
|
766
|
/**
|
767
|
* @param taxonname2
|
768
|
* @param bestMatchingTaxon
|
769
|
* @param refMods
|
770
|
* @param similarityAuthor
|
771
|
* @return
|
772
|
*/
|
773
|
protected boolean askIfReuseBestMatchingTaxon(INonViralName taxonname2, Taxon bestMatchingTaxon, Reference refMods, double similarityScore, double similarityAuthor) {
|
774
|
Object[] options = { UIManager.getString("OptionPane.yesButtonText"),
|
775
|
UIManager.getString("OptionPane.noButtonText")};
|
776
|
|
777
|
if (similarityScore<0.66 && similarityAuthor<0.5) {
|
778
|
return false;
|
779
|
// System.out.println("should say NO");
|
780
|
}
|
781
|
|
782
|
boolean sameSource=false;
|
783
|
boolean noRef=false;
|
784
|
|
785
|
String sec = refMods.getTitleCache();
|
786
|
String secBest = "";
|
787
|
try{
|
788
|
secBest=bestMatchingTaxon.getSec().getTitleCache();
|
789
|
}
|
790
|
catch(NullPointerException e){
|
791
|
logger.warn("no sec - ignore");
|
792
|
}
|
793
|
|
794
|
if (secBest.isEmpty()) {
|
795
|
noRef=true;
|
796
|
}
|
797
|
|
798
|
Object defaultOption=options[1];
|
799
|
if(sec.equalsIgnoreCase(secBest)
|
800
|
// || taxonname2.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(bestMatchingTaxon.getTitleCache().split("sec.")[0].trim())
|
801
|
) {
|
802
|
//System.out.println(sec+" and "+secBest);
|
803
|
sameSource=true;
|
804
|
//-1 <=> no author
|
805
|
if (similarityScore>0.65 && (similarityAuthor==-1 || similarityAuthor>0.8)) {
|
806
|
defaultOption=options[0];
|
807
|
} else {
|
808
|
defaultOption=options[1];
|
809
|
}
|
810
|
} else {
|
811
|
if (similarityScore>0.65 && similarityAuthor>0.8) {
|
812
|
if(similarityScore==1 ) {
|
813
|
return true;
|
814
|
}
|
815
|
defaultOption=options[0];
|
816
|
} else {
|
817
|
defaultOption=options[1];
|
818
|
}
|
819
|
}
|
820
|
|
821
|
String sourcesStr="";
|
822
|
|
823
|
Set<IdentifiableSource> sources = bestMatchingTaxon.getSources();
|
824
|
for (IdentifiableSource src:sources){
|
825
|
try{
|
826
|
String srcSec=src.getCitation().getTitleCache();
|
827
|
if(!srcSec.isEmpty()){
|
828
|
sourcesStr+="\n "+srcSec;
|
829
|
if (srcSec.equalsIgnoreCase(sec)){
|
830
|
sameSource=true;
|
831
|
if (similarityScore>0.65 && similarityAuthor>0.8) {
|
832
|
defaultOption=options[0];
|
833
|
} else {
|
834
|
defaultOption=options[1];
|
835
|
}
|
836
|
}
|
837
|
}
|
838
|
}catch(Exception e){
|
839
|
logger.warn("the source reference is maybe null, just ignore it.");
|
840
|
}
|
841
|
}
|
842
|
|
843
|
if (sameSource && similarityScore>0.9999 && (similarityAuthor==-1 || similarityAuthor>0.8)) {
|
844
|
return true;
|
845
|
}
|
846
|
if(similarityScore<0.66) {
|
847
|
defaultOption=options[1];
|
848
|
}
|
849
|
|
850
|
// //only activate it if you know the data you are importing (ok for Chenopodium)
|
851
|
if(defaultOption==options[1]) {
|
852
|
return false;
|
853
|
}
|
854
|
|
855
|
JTextArea textArea =null;
|
856
|
if (!sourcesStr.isEmpty()) {
|
857
|
textArea = new JTextArea("Does "+taxonname2.toString()+" correspond to "
|
858
|
+ bestMatchingTaxon.toString()+" ?\n Click \"Yes\". if it does, click \"No\" if it does not."
|
859
|
+ "\n The current sources are:"+ sourcesStr);
|
860
|
} else {
|
861
|
textArea = new JTextArea("Does "+taxonname2.toString()+" correspond to "
|
862
|
+ bestMatchingTaxon.toString()+" ?\n Click \"Yes\". if it does, click \"No\" if it does not.");
|
863
|
}
|
864
|
JScrollPane scrollPane = new JScrollPane(textArea);
|
865
|
textArea.setLineWrap(true);
|
866
|
textArea.setWrapStyleWord(true);
|
867
|
scrollPane.setPreferredSize( new Dimension( 600, 70 ) );
|
868
|
|
869
|
int addTaxon = JOptionPane.showOptionDialog(null,
|
870
|
scrollPane,
|
871
|
refMods.toString(),
|
872
|
JOptionPane.YES_NO_OPTION,
|
873
|
0,
|
874
|
null,
|
875
|
options,
|
876
|
defaultOption);
|
877
|
if(addTaxon==1) {
|
878
|
return false;
|
879
|
} else {
|
880
|
return true;
|
881
|
}
|
882
|
}
|
883
|
|
884
|
/**
|
885
|
* @param fullLineRefName
|
886
|
* @return
|
887
|
*/
|
888
|
protected int askIfNameContained(String fullLineRefName) {
|
889
|
|
890
|
JTextArea textArea = new JTextArea("Is a scientific name contained in this sentence ? Type 0 if contains a name, 1 if it's only a reference. Press 2 if it's to be ignored \n"+fullLineRefName);
|
891
|
JScrollPane scrollPane = new JScrollPane(textArea);
|
892
|
textArea.setLineWrap(true);
|
893
|
textArea.setWrapStyleWord(true);
|
894
|
scrollPane.setPreferredSize( new Dimension( 600, 400 ) );
|
895
|
|
896
|
String s = (String)JOptionPane.showInputDialog(
|
897
|
null,
|
898
|
scrollPane,
|
899
|
"",
|
900
|
JOptionPane.PLAIN_MESSAGE,
|
901
|
null,
|
902
|
null,
|
903
|
"0");
|
904
|
return Integer.valueOf(s);
|
905
|
}
|
906
|
|
907
|
|
908
|
/**
|
909
|
* @param name
|
910
|
* @return
|
911
|
*/
|
912
|
protected Rank askForRank(String fullname,Rank rank, NomenclaturalCode nomenclaturalCode) {
|
913
|
// logger.info("askForRank for "+ fullname+ ", "+rank);
|
914
|
// JFrame frame = new JFrame("I have a question");
|
915
|
// frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
|
916
|
|
917
|
if (ranksAsked.containsKey(fullname)){
|
918
|
return ranksAsked.get(fullname);
|
919
|
}
|
920
|
else{
|
921
|
boolean np=false;
|
922
|
int npi=0;
|
923
|
Rank cR = null;
|
924
|
|
925
|
while (!np && npi<2)
|
926
|
{
|
927
|
|
928
|
|
929
|
JTextArea textArea = new JTextArea("What is the correct rank for "+fullname+"?");
|
930
|
JScrollPane scrollPane = new JScrollPane(textArea);
|
931
|
textArea.setLineWrap(true);
|
932
|
textArea.setWrapStyleWord(true);
|
933
|
scrollPane.setPreferredSize( new Dimension( 600, 50 ) );
|
934
|
|
935
|
List<Rank> rankList = new ArrayList<Rank>();
|
936
|
rankList = importer.getTermService().list(Rank.class, null, null, null, null);
|
937
|
|
938
|
List<String> rankListStr = new ArrayList<String>();
|
939
|
for (Rank r:rankList) {
|
940
|
rankListStr.add(r.toString());
|
941
|
}
|
942
|
String s = (String)JOptionPane.showInputDialog(
|
943
|
null,
|
944
|
scrollPane,
|
945
|
"The rank extracted from the TaxonX file is "+rank.toString(),
|
946
|
JOptionPane.PLAIN_MESSAGE,
|
947
|
null,
|
948
|
rankListStr.toArray(),
|
949
|
rank.toString());
|
950
|
|
951
|
|
952
|
try {
|
953
|
npi++;
|
954
|
cR = Rank.getRankByEnglishName(s,nomenclaturalCode,true);
|
955
|
np=true;
|
956
|
} catch (UnknownCdmTypeException e) {
|
957
|
logger.warn("Unknown rank ?!"+s);
|
958
|
logger.warn(e);
|
959
|
}
|
960
|
}
|
961
|
ranksAsked.put(fullname,cR);
|
962
|
return cR;
|
963
|
|
964
|
}
|
965
|
}
|
966
|
|
967
|
/**
|
968
|
* ask user to specify what kind of paragraph the current "multiple" section is
|
969
|
* default possibilities are "synonyms","material examined","distribution","image caption","other"
|
970
|
* could make sense to replace this list with the CDM-Feature list
|
971
|
* if "other" is selected, a second pop-up will be prompted to ask user to specify a new Feature name.
|
972
|
* @param fullParagraph : the current Node
|
973
|
* @return the section name
|
974
|
* */
|
975
|
protected String askMultiple(Node fullParagraph){
|
976
|
String fp = "";
|
977
|
try {
|
978
|
fp = formatNode(fullParagraph);
|
979
|
} catch (TransformerFactoryConfigurationError e1) {
|
980
|
// TODO Auto-generated catch block
|
981
|
e1.printStackTrace();
|
982
|
} catch (TransformerException e1) {
|
983
|
// TODO Auto-generated catch block
|
984
|
e1.printStackTrace();
|
985
|
}
|
986
|
JTextArea textArea = new JTextArea("What category is it for this paragraph \n"+fp);
|
987
|
JScrollPane scrollPane = new JScrollPane(textArea);
|
988
|
textArea.setLineWrap(true);
|
989
|
textArea.setWrapStyleWord(true);
|
990
|
scrollPane.setPreferredSize( new Dimension( 600, 400 ) );
|
991
|
|
992
|
String[] possiblities = {"synonyms","material examined","distribution","image caption","Other","vernacular name","type status","new category"};
|
993
|
|
994
|
|
995
|
String s = (String)JOptionPane.showInputDialog(
|
996
|
null,
|
997
|
scrollPane,
|
998
|
"",
|
999
|
JOptionPane.PLAIN_MESSAGE,
|
1000
|
null,
|
1001
|
possiblities,
|
1002
|
"Other");
|
1003
|
|
1004
|
if (s.equalsIgnoreCase("new category")) {
|
1005
|
try {
|
1006
|
s=askFeatureName(formatNode(fullParagraph));
|
1007
|
} catch (TransformerFactoryConfigurationError e) {
|
1008
|
logger.warn(e);
|
1009
|
} catch (TransformerException e) {
|
1010
|
logger.warn(e);
|
1011
|
}
|
1012
|
}
|
1013
|
return s;
|
1014
|
|
1015
|
}
|
1016
|
|
1017
|
|
1018
|
|
1019
|
/**
|
1020
|
* asks for the hierarchical parent, based on the current classification
|
1021
|
* @param taxon
|
1022
|
* @param classification
|
1023
|
* @return Taxon, the parent Taxon
|
1024
|
*/
|
1025
|
protected Taxon askParent(Taxon taxon,Classification classification ) {
|
1026
|
// System.out.println("ASK PARENT "+classification);
|
1027
|
// logger.info("ask Parent "+taxon.getTitleCache());
|
1028
|
Set<TaxonNode> allNodes = classification.getAllNodes();
|
1029
|
Map<String,Taxon> nodesMap = new HashMap<String, Taxon>();
|
1030
|
|
1031
|
for (TaxonNode tn:allNodes){
|
1032
|
Taxon t = tn.getTaxon();
|
1033
|
nodesMap.put(t.getTitleCache(), t);
|
1034
|
}
|
1035
|
List<String> nodeList = new ArrayList<String>();
|
1036
|
for (String nl : nodesMap.keySet()) {
|
1037
|
nodeList.add(nl+" - "+nodesMap.get(nl).getName().getRank());
|
1038
|
}
|
1039
|
Collections.sort(nodeList);
|
1040
|
nodeList.add(0, "Not here!");
|
1041
|
|
1042
|
JFrame frame = new JFrame("I have a question");
|
1043
|
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
|
1044
|
String s = (String)JOptionPane.showInputDialog(
|
1045
|
frame,
|
1046
|
"What is the taxon parent for "+taxon.getTitleCache()+"?",
|
1047
|
"The current classification is "+classification.getTitleCache(),
|
1048
|
JOptionPane.PLAIN_MESSAGE,
|
1049
|
null,
|
1050
|
nodeList.toArray(),
|
1051
|
"Not here!");
|
1052
|
|
1053
|
Taxon returnTaxon = nodesMap.get(s.split(" - ")[0]);
|
1054
|
// logger.info("ask Parent returns "+s);
|
1055
|
return returnTaxon;
|
1056
|
}
|
1057
|
|
1058
|
|
1059
|
/**
|
1060
|
*
|
1061
|
* @param r: the rank as string (with dwc tags)
|
1062
|
* @return Rank : the Rank object corresponding to the current string
|
1063
|
*
|
1064
|
*/
|
1065
|
protected Rank getRank(String r){
|
1066
|
if (r==null) {
|
1067
|
r=Rank.UNKNOWN_RANK().toString();
|
1068
|
}
|
1069
|
r=r.replace("dwcranks:", "");
|
1070
|
r =r.replace("dwc:","");
|
1071
|
|
1072
|
Rank rank = Rank.UNKNOWN_RANK();
|
1073
|
if (r.equalsIgnoreCase("Superfamily")) {
|
1074
|
rank=Rank.SUPERFAMILY();
|
1075
|
}
|
1076
|
else if (r.equalsIgnoreCase("Family")) {
|
1077
|
rank=Rank.FAMILY();
|
1078
|
}
|
1079
|
else if (r.equalsIgnoreCase("Subfamily")) {
|
1080
|
rank=Rank.SUBFAMILY();
|
1081
|
}
|
1082
|
else if (r.equalsIgnoreCase("Tribe")) {
|
1083
|
rank=Rank.TRIBE();
|
1084
|
}
|
1085
|
else if (r.equalsIgnoreCase("Subtribe")) {
|
1086
|
rank=Rank.SUBTRIBE();
|
1087
|
}
|
1088
|
else if (r.equalsIgnoreCase("Genus")) {
|
1089
|
rank=Rank.GENUS();
|
1090
|
}
|
1091
|
else if (r.equalsIgnoreCase("Subgenus")) {
|
1092
|
rank=Rank.SUBGENUS();
|
1093
|
}
|
1094
|
else if (r.equalsIgnoreCase("Section")) {
|
1095
|
rank=Rank.SECTION_BOTANY();
|
1096
|
}
|
1097
|
else if (r.equalsIgnoreCase("Subsection")) {
|
1098
|
rank=Rank.SUBSECTION_BOTANY();
|
1099
|
}
|
1100
|
else if (r.equalsIgnoreCase("Series")) {
|
1101
|
rank=Rank.SERIES();
|
1102
|
}
|
1103
|
else if (r.equalsIgnoreCase("Subseries")) {
|
1104
|
rank=Rank.SUBSERIES();
|
1105
|
}
|
1106
|
else if (r.equalsIgnoreCase("Species")) {
|
1107
|
rank=Rank.SPECIES();
|
1108
|
}
|
1109
|
else if (r.equalsIgnoreCase("Subspecies")) {
|
1110
|
rank=Rank.SUBSPECIES();
|
1111
|
}
|
1112
|
else if (r.equalsIgnoreCase("Variety") || r.equalsIgnoreCase("varietyEpithet")) {
|
1113
|
rank=Rank.VARIETY();
|
1114
|
}
|
1115
|
else if (r.equalsIgnoreCase("Subvariety")) {
|
1116
|
rank=Rank.SUBVARIETY();
|
1117
|
}
|
1118
|
else if (r.equalsIgnoreCase("Form")) {
|
1119
|
rank=Rank.FORM();
|
1120
|
}
|
1121
|
else if (r.equalsIgnoreCase("Subform")) {
|
1122
|
rank=Rank.SUBFORM();
|
1123
|
}else if (r.equalsIgnoreCase("higher")) {
|
1124
|
// rank=Rank.SUPRAGENERICTAXON();
|
1125
|
logger.warn("handling of 'higher' rank still unclear");
|
1126
|
}
|
1127
|
|
1128
|
return rank;
|
1129
|
}
|
1130
|
|
1131
|
|
1132
|
/**
|
1133
|
* @param ato: atomised taxon name data
|
1134
|
* @return rank present in the xmldata fields
|
1135
|
*/
|
1136
|
protected Rank getRank(Map<String, String> ato) {
|
1137
|
Rank rank=Rank.UNKNOWN_RANK();
|
1138
|
|
1139
|
if (ato == null) {
|
1140
|
return rank;
|
1141
|
}
|
1142
|
if (ato.containsKey("dwc:family")){
|
1143
|
rank=Rank.FAMILY();
|
1144
|
}
|
1145
|
if (ato.containsKey("dwc:tribe") || ato.containsKey("dwcranks:tribe")){
|
1146
|
rank=Rank.TRIBE();
|
1147
|
}
|
1148
|
if (ato.containsKey("dwc:genus")) {
|
1149
|
rank= Rank.GENUS();
|
1150
|
}
|
1151
|
if (ato.containsKey("dwc:subgenus")) {
|
1152
|
rank= Rank.SUBGENUS();
|
1153
|
}
|
1154
|
if (ato.containsKey("dwc:specificepithet") || ato.containsKey("dwc:species")) {
|
1155
|
rank= Rank.SPECIES();
|
1156
|
}
|
1157
|
if (ato.containsKey("dwc:infraspecificepithet")) {
|
1158
|
rank= Rank.INFRASPECIES();
|
1159
|
}
|
1160
|
if (ato.containsKey("dwcranks:varietyepithet")) {
|
1161
|
rank=Rank.VARIETY();
|
1162
|
}
|
1163
|
//popUp(rank.getTitleCache());
|
1164
|
return rank;
|
1165
|
}
|
1166
|
|
1167
|
/**
|
1168
|
* Format a XML node for a clean (screen) output with tags
|
1169
|
* @param Node : the node to format
|
1170
|
* @return String : the XML section formated for a screen output
|
1171
|
* */
|
1172
|
|
1173
|
protected String formatNode(Node node) throws TransformerFactoryConfigurationError, TransformerException{
|
1174
|
Transformer transformer = TransformerFactory.newInstance().newTransformer();
|
1175
|
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
|
1176
|
//initialize StreamResult with File object to save to file
|
1177
|
StreamResult result = new StreamResult(new StringWriter());
|
1178
|
DOMSource source = new DOMSource(node);
|
1179
|
transformer.transform(source, result);
|
1180
|
String xmlString = result.getWriter().toString();
|
1181
|
return xmlString;
|
1182
|
}
|
1183
|
|
1184
|
protected boolean containsDistinctLetters(String word){
|
1185
|
Set<Character> dl = new HashSet<Character>();
|
1186
|
for (char a: word.toCharArray()) {
|
1187
|
dl.add(a);
|
1188
|
}
|
1189
|
if(dl.size()>1 && word.indexOf("no description text")==-1) {
|
1190
|
return true;
|
1191
|
} else {
|
1192
|
return false;
|
1193
|
}
|
1194
|
}
|
1195
|
|
1196
|
/**
|
1197
|
* Tries to match the status string against any new name status
|
1198
|
* and returns the status if it matches. Returns <code>null</code> otherwise.
|
1199
|
* @param status
|
1200
|
* @return
|
1201
|
*/
|
1202
|
protected String newNameStatus(String status){
|
1203
|
String pattern = "(" + "((sp|spec|gen|comb|)\\.\\s*nov.)" +
|
1204
|
"|(new\\s*(species|combination))" +
|
1205
|
"|(n\\.\\s*sp\\.)" +
|
1206
|
"|(sp\\.\\s*n\\.)" +
|
1207
|
")";
|
1208
|
if (status.trim().matches(pattern)){
|
1209
|
//FIXME
|
1210
|
return null;
|
1211
|
// return status;
|
1212
|
}else{
|
1213
|
return null;
|
1214
|
}
|
1215
|
}
|
1216
|
|
1217
|
|
1218
|
/** Creates an cdm-NomenclaturalCode by the tcs NomenclaturalCode
|
1219
|
*/
|
1220
|
protected NomenclaturalStatusType nomStatusString2NomStatus (String nomStatus) throws UnknownCdmTypeException{
|
1221
|
|
1222
|
if (nomStatus == null){ return null;
|
1223
|
}else if ("Valid".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.VALID();
|
1224
|
|
1225
|
}else if ("Alternative".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ALTERNATIVE();
|
1226
|
}else if ("nom. altern.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ALTERNATIVE();
|
1227
|
|
1228
|
}else if ("Ambiguous".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.AMBIGUOUS();
|
1229
|
|
1230
|
}else if ("Doubtful".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.DOUBTFUL();
|
1231
|
|
1232
|
}else if ("Confusum".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.CONFUSUM();
|
1233
|
|
1234
|
}else if ("Illegitimate".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ILLEGITIMATE();
|
1235
|
}else if ("nom. illeg.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ILLEGITIMATE();
|
1236
|
|
1237
|
}else if ("Superfluous".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.SUPERFLUOUS();
|
1238
|
}else if ("nom. superfl.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.SUPERFLUOUS();
|
1239
|
|
1240
|
}else if ("Rejected".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.REJECTED();
|
1241
|
}else if ("nom. rej.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.REJECTED();
|
1242
|
|
1243
|
}else if ("Utique Rejected".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.UTIQUE_REJECTED();
|
1244
|
|
1245
|
}else if ("Conserved Prop".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.CONSERVED_PROP();
|
1246
|
|
1247
|
}else if ("Orthography Conserved Prop".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ORTHOGRAPHY_CONSERVED_PROP();
|
1248
|
|
1249
|
}else if ("Legitimate".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.LEGITIMATE();
|
1250
|
|
1251
|
}else if ("Novum".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.NOVUM();
|
1252
|
}else if ("nom. nov.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.NOVUM();
|
1253
|
|
1254
|
}else if ("Utique Rejected Prop".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.UTIQUE_REJECTED_PROP();
|
1255
|
|
1256
|
}else if ("Orthography Conserved".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ORTHOGRAPHY_CONSERVED();
|
1257
|
|
1258
|
}else if ("Rejected Prop".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.REJECTED_PROP();
|
1259
|
|
1260
|
}else if ("Conserved".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.CONSERVED();
|
1261
|
}else if ("nom. cons.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.CONSERVED();
|
1262
|
|
1263
|
}else if ("Sanctioned".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.SANCTIONED();
|
1264
|
|
1265
|
}else if ("Invalid".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.INVALID();
|
1266
|
}else if ("nom. inval.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.INVALID();
|
1267
|
|
1268
|
}else if ("Nudum".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.NUDUM();
|
1269
|
}else if ("nom. nud.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.NUDUM();
|
1270
|
|
1271
|
}else if ("Combination Invalid".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.COMBINATION_INVALID();
|
1272
|
|
1273
|
}else if ("Provisional".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.PROVISIONAL();
|
1274
|
}else if ("nom. provis.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.PROVISIONAL();
|
1275
|
}
|
1276
|
else {
|
1277
|
throw new UnknownCdmTypeException("Unknown Nomenclatural status type " + nomStatus);
|
1278
|
}
|
1279
|
}
|
1280
|
|
1281
|
|
1282
|
//TypeDesignation
|
1283
|
protected SpecimenTypeDesignationStatus typeStatusId2TypeStatus (int typeStatusId) throws UnknownCdmTypeException{
|
1284
|
switch (typeStatusId){
|
1285
|
case 0: return null;
|
1286
|
case 1: return SpecimenTypeDesignationStatus.HOLOTYPE();
|
1287
|
case 2: return SpecimenTypeDesignationStatus.LECTOTYPE();
|
1288
|
case 3: return SpecimenTypeDesignationStatus.NEOTYPE();
|
1289
|
case 4: return SpecimenTypeDesignationStatus.EPITYPE();
|
1290
|
case 5: return SpecimenTypeDesignationStatus.ISOLECTOTYPE();
|
1291
|
case 6: return SpecimenTypeDesignationStatus.ISONEOTYPE();
|
1292
|
case 7: return SpecimenTypeDesignationStatus.ISOTYPE();
|
1293
|
case 8: return SpecimenTypeDesignationStatus.PARANEOTYPE();
|
1294
|
case 9: return SpecimenTypeDesignationStatus.PARATYPE();
|
1295
|
case 10: return SpecimenTypeDesignationStatus.SECOND_STEP_LECTOTYPE();
|
1296
|
case 11: return SpecimenTypeDesignationStatus.SECOND_STEP_NEOTYPE();
|
1297
|
case 12: return SpecimenTypeDesignationStatus.SYNTYPE();
|
1298
|
case 21: return SpecimenTypeDesignationStatus.ICONOTYPE();
|
1299
|
case 22: return SpecimenTypeDesignationStatus.PHOTOTYPE();
|
1300
|
default: {
|
1301
|
throw new UnknownCdmTypeException("Unknown TypeDesignationStatus (id=" + Integer.valueOf(typeStatusId).toString() + ")");
|
1302
|
}
|
1303
|
}
|
1304
|
}
|
1305
|
|
1306
|
|
1307
|
}
|
1308
|
|
1309
|
|