3 * Copyright (C) 2013 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.taxonx2013
;
12 import java
.awt
.Dimension
;
13 import java
.io
.StringWriter
;
14 import java
.util
.ArrayList
;
15 import java
.util
.Collections
;
16 import java
.util
.HashMap
;
17 import java
.util
.HashSet
;
18 import java
.util
.List
;
20 import java
.util
.Scanner
;
22 import java
.util
.UUID
;
24 import javax
.swing
.JFrame
;
25 import javax
.swing
.JOptionPane
;
26 import javax
.swing
.JScrollPane
;
27 import javax
.swing
.JTextArea
;
28 import javax
.swing
.UIManager
;
29 import javax
.xml
.transform
.OutputKeys
;
30 import javax
.xml
.transform
.Transformer
;
31 import javax
.xml
.transform
.TransformerException
;
32 import javax
.xml
.transform
.TransformerFactory
;
33 import javax
.xml
.transform
.TransformerFactoryConfigurationError
;
34 import javax
.xml
.transform
.dom
.DOMSource
;
35 import javax
.xml
.transform
.stream
.StreamResult
;
37 import org
.apache
.commons
.lang
.StringUtils
;
38 import org
.apache
.log4j
.Logger
;
39 import org
.w3c
.dom
.Node
;
40 import org
.w3c
.dom
.NodeList
;
42 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
43 import eu
.etaxonomy
.cdm
.api
.service
.IAgentService
;
44 import eu
.etaxonomy
.cdm
.io
.specimen
.UnitsGatheringArea
;
45 import eu
.etaxonomy
.cdm
.io
.specimen
.UnitsGatheringEvent
;
46 import eu
.etaxonomy
.cdm
.model
.agent
.AgentBase
;
47 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
48 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
49 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTermBase
;
50 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableSource
;
51 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
52 import eu
.etaxonomy
.cdm
.model
.common
.TimePeriod
;
53 import eu
.etaxonomy
.cdm
.model
.common
.UuidAndTitleCache
;
54 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
55 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
56 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
57 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
58 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
59 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatusType
;
60 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
61 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
62 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignation
;
63 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignationStatus
;
64 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
65 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
66 import eu
.etaxonomy
.cdm
.model
.occurrence
.GatheringEvent
;
67 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationBase
;
68 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationType
;
69 import eu
.etaxonomy
.cdm
.model
.reference
.IBook
;
70 import eu
.etaxonomy
.cdm
.model
.reference
.IBookSection
;
71 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
72 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
73 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
74 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
75 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonNode
;
76 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
77 import eu
.etaxonomy
.cdm
.strategy
.parser
.ParserProblem
;
85 public class TaxonXExtractor
{
87 protected TaxonXImport importer
;
88 protected TaxonXImportState state2
;
89 private final Map
<String
,String
> namesAsked
= new HashMap
<String
, String
>();
90 private final Map
<String
,Rank
>ranksAsked
= new HashMap
<String
, Rank
>();
92 Logger logger
= Logger
.getLogger(TaxonXExtractor
.class);
94 public class ReferenceBuilder
{
96 private boolean foundBibref
=false;
97 private final TaxonXAddSources sourceHandler
;
100 * @param sourceHandler
102 public ReferenceBuilder(TaxonXAddSources sourceHandler
) {
103 this.sourceHandler
=sourceHandler
;
107 * @return the foundBibref
109 public boolean isFoundBibref() {
114 * @param foundBibref the foundBibref to set
116 public void setFoundBibref(boolean foundBibref
) {
117 this.foundBibref
= foundBibref
;
125 public void builReference(String mref
, String treatmentMainName
, NomenclaturalCode nomenclaturalCode
,
126 Taxon acceptedTaxon
, Reference
<?
> refMods
) {
127 // System.out.println("builReference "+mref);
128 this.setFoundBibref(true);
131 if ( (ref
.endsWith(";") ||ref
.endsWith(",") ) && ((ref
.length())>1)) {
132 ref
=ref
.substring(0, ref
.length()-1)+".";
134 if (ref
.startsWith(treatmentMainName
) && !ref
.endsWith(treatmentMainName
)) {
135 ref
=ref
.replace(treatmentMainName
, "");
137 while (ref
.startsWith(".") || ref
.startsWith(",")) {
138 ref
=ref
.replace(".","").replace(",","").trim();
142 // logger.info("Current reference :"+nbRef+", "+ref+", "+treatmentMainName+"--"+ref.indexOf(treatmentMainName));
143 Reference
<?
> reference
= ReferenceFactory
.newGeneric();
144 reference
.setTitleCache(ref
, true);
146 //only add the first one if there is no nomenclatural reference yet
148 if(acceptedTaxon
.getName().getNomenclaturalReference()==null){
149 acceptedTaxon
.getName().setNomenclaturalReference(reference
);
150 sourceHandler
.addSource(refMods
, acceptedTaxon
);
153 //add all other references as Feature.Citation
154 TaxonDescription taxonDescription
=importer
.getTaxonDescription(acceptedTaxon
, false, true);
155 acceptedTaxon
.addDescription(taxonDescription
);
156 sourceHandler
.addSource(refMods
, acceptedTaxon
);
158 TextData textData
= TextData
.NewInstance(Feature
.CITATION());
159 Language language
= Language
.DEFAULT();
160 textData
.putText(language
, ref
);
161 sourceHandler
.addSource(reference
, textData
,acceptedTaxon
.getName(),refMods
);
162 taxonDescription
.addElement(textData
);
164 sourceHandler
.addSource(refMods
, taxonDescription
);
166 importer
.getTaxonService().saveOrUpdate(acceptedTaxon
);
167 // logger.warn("BWAAHHHH: "+nameToBeFilled.getParsingProblems()+", "+ref);
174 public class MySpecimenOrObservation
{
176 DerivedUnit derivedUnitBase
=null;
178 public String
getDescr() {
181 public void setDescr(String descr
) {
184 public DerivedUnit
getDerivedUnitBase() {
185 return derivedUnitBase
;
187 public void setDerivedUnitBase(DerivedUnit derivedUnitBase
) {
188 this.derivedUnitBase
= derivedUnitBase
;
200 @SuppressWarnings({ "unused", "rawtypes" })
201 protected MySpecimenOrObservation
extractSpecimenOrObservation(Node specimenObservationNode
, DerivedUnit derivedUnitBase
,
202 SpecimenOrObservationType defaultAssociation
, TaxonNameBase
<?
,?
> typifiableName
) {
204 String locality
=null;
205 String stateprov
=null;
206 String collector
=null;
207 String fieldNumber
=null;
208 Double latitude
=null,longitude
=null;
210 String day
,month
,year
="";
211 String descr
="not available";
214 NodeList eventContent
=null;
216 DerivedUnitFacade derivedUnitFacade
= null;
218 UnitsGatheringEvent unitsGatheringEvent
;
219 UnitsGatheringArea unitsGatheringArea
;
220 DefinedTermBase areaCountry
;
222 MySpecimenOrObservation specimenOrObservation
= new MySpecimenOrObservation();
224 NodeList xmldata
= specimenObservationNode
.getChildNodes();
225 for (int n
=0;n
<xmldata
.getLength();n
++){
226 eventContent
=xmldata
.item(n
).getChildNodes();
227 if (xmldata
.item(n
).getNodeName().equalsIgnoreCase("tax:xmldata")){
240 for (int j
=0;j
<eventContent
.getLength();j
++){
241 if(eventContent
.item(j
).getNodeName().equalsIgnoreCase("dwc:country")){
242 country
=eventContent
.item(j
).getTextContent().trim();
244 else if(eventContent
.item(j
).getNodeName().equalsIgnoreCase("dwc:locality")){
245 locality
=eventContent
.item(j
).getTextContent().trim();
247 else if(eventContent
.item(j
).getNodeName().equalsIgnoreCase("dwc:stateprovince")){
248 stateprov
=eventContent
.item(j
).getTextContent().trim();
250 else if(eventContent
.item(j
).getNodeName().equalsIgnoreCase("dwc:collector")){
251 collector
=eventContent
.item(j
).getTextContent().trim();
253 else if(eventContent
.item(j
).getNodeName().equalsIgnoreCase("dwc:yearcollected")){
254 year
=eventContent
.item(j
).getTextContent().trim();
256 else if(eventContent
.item(j
).getNodeName().equalsIgnoreCase("dwc:monthcollected")){
257 month
=eventContent
.item(j
).getTextContent().trim();
259 else if(eventContent
.item(j
).getNodeName().equalsIgnoreCase("dwc:daycollected")){
260 day
=eventContent
.item(j
).getTextContent().trim();
262 else if(eventContent
.item(j
).getNodeName().equalsIgnoreCase("dwc:decimallongitude")){
263 String tmp
= eventContent
.item(j
).getTextContent().trim();
264 try{longitude
=Double
.valueOf(tmp
);}catch(Exception e
){logger
.warn("longitude is not a number");}
266 else if(eventContent
.item(j
).getNodeName().equalsIgnoreCase("dwc:decimallatitude")){
267 String tmp
= eventContent
.item(j
).getTextContent().trim();
268 try{latitude
=Double
.valueOf(tmp
);}catch(Exception e
){logger
.warn("latitude is not a number");}
269 }else if(eventContent
.item(j
).getNodeName().equalsIgnoreCase("dwc:TypeStatus")){
270 type
= eventContent
.item(j
).getTextContent().trim();
271 }else if(eventContent
.item(j
).getNodeName().equalsIgnoreCase("#text") && StringUtils
.isBlank(eventContent
.item(j
).getTextContent())){
275 logger
.info("UNEXTRACTED FIELD FOR SPECIMEN "+eventContent
.item(j
).getNodeName()+", "+eventContent
.item(j
).getTextContent()) ;
278 if (!day
.isEmpty() || !month
.isEmpty() || !year
.isEmpty()){
280 if (!year
.isEmpty()) {
281 tp
= TimePeriod
.NewInstance(Integer
.parseInt(year
));
282 if (!month
.isEmpty()) {
283 tp
.setStartMonth(Integer
.parseInt(month
));
284 if (!day
.isEmpty()) {
285 tp
.setStartDay(Integer
.parseInt(day
));
291 logger
.warn("Collection date error "+e
);
295 if(xmldata
.item(n
).getNodeName().equalsIgnoreCase("#text")){
296 descr
=xmldata
.item(n
).getTextContent().replaceAll(";","").trim();
297 if (descr
.length()>1 && containsDistinctLetters(descr
)) {
298 specimenOrObservation
.setDescr(descr
);
302 if(xmldata
.item(n
).getNodeName().equalsIgnoreCase("tax:p")){
303 descr
=xmldata
.item(n
).getTextContent().replaceAll(";","").trim();
304 if (descr
.length()>1 && containsDistinctLetters(descr
)) {
305 specimenOrObservation
.setDescr(descr
);
310 // if(asso && descr.length()>1){
312 // logger.info("DESCR: "+descr);
313 if (!type
.isEmpty()) {
314 if (!containsDistinctLetters(type
)) {
315 type
="no description text";
317 derivedUnitFacade
= getFacade(type
.replaceAll(";",""), defaultAssociation
);
318 SpecimenTypeDesignation designation
= SpecimenTypeDesignation
.NewInstance();
320 if (typifiableName
!= null){
321 typifiableName
.addTypeDesignation(designation
, true);
323 logger
.warn("No typifiable name available");
325 SpecimenTypeDesignationStatus stds
= getSpecimenTypeDesignationStatusByKey(type
);
327 stds
= (SpecimenTypeDesignationStatus
) importer
.getTermService().find(stds
.getUuid());
330 designation
.setTypeStatus(stds
);
331 derivedUnitFacade
.innerDerivedUnit().addSpecimenTypeDesignation(designation
);
333 derivedUnitBase
= derivedUnitFacade
.innerDerivedUnit();
334 // System.out.println("derivedUnitBase: "+derivedUnitBase);
335 // designation.setTypeSpecimen(derivedUnitBase);
336 // TaxonNameBase<?,?> name = taxon.getName();
337 // name.addTypeDesignation(designation, true);
339 if (!containsDistinctLetters(descr
.replaceAll(";",""))) {
340 descr
="no description text";
343 derivedUnitFacade
= getFacade(descr
.replaceAll(";",""), defaultAssociation
);
344 derivedUnitBase
= derivedUnitFacade
.innerDerivedUnit();
345 // System.out.println("derivedUnitBase2: "+derivedUnitBase);
348 unitsGatheringEvent
= new UnitsGatheringEvent(importer
.getTermService(), locality
,collector
,longitude
, latitude
,
349 state2
.getConfig(),importer
.getAgentService());
352 unitsGatheringEvent
.setGatheringDate(tp
);
356 unitsGatheringArea
= new UnitsGatheringArea();
357 unitsGatheringArea
.setParams(null, country
, state2
.getConfig(), importer
.getTermService(), importer
.getOccurrenceService());
359 if (StringUtils
.isNotBlank(stateprov
)){
360 Map
<String
, String
> namedAreas
= new HashMap
<String
, String
>();
361 namedAreas
.put(stateprov
, null);
362 unitsGatheringArea
.setAreaNames(namedAreas
, state2
.getConfig(), importer
.getTermService(), importer
.getVocabularyService());
365 areaCountry
= unitsGatheringArea
.getCountry();
368 // unitsGatheringArea = new UnitsGatheringArea(namedAreaList,dataHolder.getTermService());
369 // ArrayList<DefinedTermBase> nas = unitsGatheringArea.getAreas();
370 // for (DefinedTermBase namedArea : nas) {
371 // unitsGatheringEvent.addArea(namedArea);
374 // copy gathering event to facade
375 GatheringEvent gatheringEvent
= unitsGatheringEvent
.getGatheringEvent();
376 derivedUnitFacade
.setGatheringEvent(gatheringEvent
);
377 derivedUnitFacade
.setLocality(gatheringEvent
.getLocality());
378 derivedUnitFacade
.setExactLocation(gatheringEvent
.getExactLocation());
379 derivedUnitFacade
.setCollector(gatheringEvent
.getCollector());
380 derivedUnitFacade
.setCountry((NamedArea
)areaCountry
);
382 for(DefinedTermBase
<?
> area
:unitsGatheringArea
.getAreas()){
383 derivedUnitFacade
.addCollectingArea((NamedArea
) area
);
385 // derivedUnitFacade.addCollectingAreas(unitsGatheringArea.getAreas());
388 if (fieldNumber
!= null) {
389 derivedUnitFacade
.setFieldNumber(fieldNumber
);
391 specimenOrObservation
.setDerivedUnitBase(derivedUnitBase
);
393 return specimenOrObservation
;
397 private SpecimenTypeDesignationStatus
getSpecimenTypeDesignationStatusByKey(
401 } else if (key
.matches("(?i)(T|Type)")) {
402 return SpecimenTypeDesignationStatus
.TYPE();
403 } else if (key
.matches("(?i)(HT|Holotype)")) {
404 return SpecimenTypeDesignationStatus
.HOLOTYPE();
405 } else if (key
.matches("(?i)(LT|Lectotype)")) {
406 return SpecimenTypeDesignationStatus
.LECTOTYPE();
407 } else if (key
.matches("(?i)(NT|Neotype)")) {
408 return SpecimenTypeDesignationStatus
.NEOTYPE();
409 } else if (key
.matches("(?i)(ST|Syntype)")) {
410 return SpecimenTypeDesignationStatus
.SYNTYPE();
411 } else if (key
.matches("(?i)(ET|Epitype)")) {
412 return SpecimenTypeDesignationStatus
.EPITYPE();
413 } else if (key
.matches("(?i)(IT|Isotype)")) {
414 return SpecimenTypeDesignationStatus
.ISOTYPE();
415 } else if (key
.matches("(?i)(ILT|Isolectotype)")) {
416 return SpecimenTypeDesignationStatus
.ISOLECTOTYPE();
417 } else if (key
.matches("(?i)(INT|Isoneotype)")) {
418 return SpecimenTypeDesignationStatus
.ISONEOTYPE();
419 } else if (key
.matches("(?i)(IET|Isoepitype)")) {
420 return SpecimenTypeDesignationStatus
.ISOEPITYPE();
421 } else if (key
.matches("(?i)(PT|Paratype)")) {
422 return SpecimenTypeDesignationStatus
.PARATYPE();
423 } else if (key
.matches("(?i)(PLT|Paralectotype)")) {
424 return SpecimenTypeDesignationStatus
.PARALECTOTYPE();
425 } else if (key
.matches("(?i)(PNT|Paraneotype)")) {
426 return SpecimenTypeDesignationStatus
.PARANEOTYPE();
427 } else if (key
.matches("(?i)(unsp.|Unspecified)")) {
428 return SpecimenTypeDesignationStatus
.UNSPECIFIC();
429 } else if (key
.matches("(?i)(2LT|Second Step Lectotype)")) {
430 return SpecimenTypeDesignationStatus
.SECOND_STEP_LECTOTYPE();
431 } else if (key
.matches("(?i)(2NT|Second Step Neotype)")) {
432 return SpecimenTypeDesignationStatus
.SECOND_STEP_NEOTYPE();
433 } else if (key
.matches("(?i)(OM|Original Material)")) {
434 return SpecimenTypeDesignationStatus
.ORIGINAL_MATERIAL();
435 } else if (key
.matches("(?i)(IcT|Iconotype)")) {
436 return SpecimenTypeDesignationStatus
.ICONOTYPE();
437 } else if (key
.matches("(?i)(PT|Phototype)")) {
438 return SpecimenTypeDesignationStatus
.PHOTOTYPE();
439 } else if (key
.matches("(?i)(IST|Isosyntype)")) {
440 return SpecimenTypeDesignationStatus
.ISOSYNTYPE();
445 protected DerivedUnitFacade
getFacade(String recordBasis
, SpecimenOrObservationType defaultAssoc
) {
446 // System.out.println("getFacade() for "+recordBasis+", defaultassociation: "+defaultAssoc);
447 SpecimenOrObservationType type
= null;
450 if (recordBasis
!= null) {
451 String recordBasisL
= recordBasis
.toLowerCase();
452 if (recordBasisL
.startsWith("specimen") || recordBasisL
.contains("specimen") || recordBasisL
.contains("type")) {// specimen
453 type
= SpecimenOrObservationType
.PreservedSpecimen
;
455 if (recordBasisL
.startsWith("observation")) {
456 type
= SpecimenOrObservationType
.Observation
;
458 if (recordBasisL
.contains("fossil")) {
459 type
= SpecimenOrObservationType
.Fossil
;
462 if (recordBasisL
.startsWith("living")) {
463 type
= SpecimenOrObservationType
.LivingSpecimen
;
466 logger
.info("The basis of record does not seem to be known: *" + recordBasisL
+"*");
471 logger
.info("The basis of record is null");
474 DerivedUnitFacade derivedUnitFacade
= DerivedUnitFacade
.NewInstance(type
);
475 return derivedUnitFacade
;
480 @SuppressWarnings("rawtypes")
481 protected Feature
makeFeature(SpecimenOrObservationBase unit
) {
485 SpecimenOrObservationType type
= unit
.getRecordBasis();
487 if (type
.isFeatureObservation()){
488 return Feature
.OBSERVATION();
489 }else if (type
.isPreservedSpecimen() ||
490 type
== SpecimenOrObservationType
.LivingSpecimen
||
491 type
== SpecimenOrObservationType
.OtherSpecimen
493 return Feature
.SPECIMEN();
494 }else if (type
== SpecimenOrObservationType
.Unknown
||
495 type
== SpecimenOrObservationType
.DerivedUnit
497 return Feature
.INDIVIDUALS_ASSOCIATION();
499 logger
.warn("No feature defined for derived unit class: "
500 + unit
.getClass().getSimpleName());
505 protected final static String SPLITTER
= ",";
508 protected int askQuestion(String question
){
509 Scanner scan
= new Scanner(System
.in
);
510 logger
.info(question
);
511 int index
= scan
.nextInt();
520 protected Reference
<?
> getReferenceWithType(int reftype
) {
521 Reference
<?
> ref
= null;
524 ref
= ReferenceFactory
.newGeneric();
527 IBook tmp
= ReferenceFactory
.newBook();
528 ref
= (Reference
<?
>)tmp
;
531 ref
= ReferenceFactory
.newArticle();
534 IBookSection tmp2
= ReferenceFactory
.newBookSection();
535 ref
= (Reference
<?
>)tmp2
;
538 ref
= ReferenceFactory
.newJournal();
541 ref
= ReferenceFactory
.newPrintSeries();
544 ref
= ReferenceFactory
.newThesis();
555 protected void prepareCollectors(TaxonXImportState state
,IAgentService agentService
) {
556 // logger.info("PREPARE COLLECTORS");
557 List
<String
> collectors
= new ArrayList
<String
>();
559 List
<String
> collectorsU
= new ArrayList
<String
>(new HashSet
<String
>(collectors
));
560 Set
<UUID
> uuids
= new HashSet
<UUID
>();
562 //existing persons in DB
563 List
<UuidAndTitleCache
<Person
>> hiberPersons
= agentService
.getPersonUuidAndTitleCache();
564 Map
<String
,Person
> titleCachePerson
= new HashMap
<String
, Person
>();
565 uuids
= new HashSet
<UUID
>();
566 for (UuidAndTitleCache
<Person
> hibernateP
:hiberPersons
){
567 uuids
.add(hibernateP
.getUuid());
570 if (!uuids
.isEmpty()){
571 List
<AgentBase
> existingPersons
= agentService
.find(uuids
);
572 for (AgentBase existingP
:existingPersons
){
573 titleCachePerson
.put(existingP
.getTitleCache(),CdmBase
.deproxy(existingP
, Person
.class));
577 Map
<String
,UUID
> personMap
= new HashMap
<String
, UUID
>();
578 for (UuidAndTitleCache
<Person
> person
:hiberPersons
){
579 personMap
.put(person
.getTitleCache(), person
.getUuid());
582 java
.util
.Collection
<AgentBase
> personToadd
= new ArrayList
<AgentBase
>();
584 for (String collector
:collectorsU
){
585 Person p
= Person
.NewInstance();
586 p
.setTitleCache(collector
,true);
587 if (!personMap
.containsKey(p
.getTitleCache())){
592 if(!personToadd
.isEmpty()){
593 Map
<UUID
, AgentBase
> uuuidPerson
= agentService
.save(personToadd
);
594 for (UUID u
:uuuidPerson
.keySet()){
595 titleCachePerson
.put(uuuidPerson
.get(u
).getTitleCache(), CdmBase
.deproxy(uuuidPerson
.get(u
), Person
.class));
599 state
.getConfig().setPersons(titleCachePerson
);
606 protected String
getFullReference(String name
, List
<ParserProblem
> problems
) {
607 // logger.info("getFullReference for "+ name);
608 JTextArea textArea
= new JTextArea("Complete the reference or the name '"+name
+"'.\nThe current problem is "+StringUtils
.join(problems
,"--"));
609 JScrollPane scrollPane
= new JScrollPane(textArea
);
610 textArea
.setLineWrap(true);
611 textArea
.setWrapStyleWord(true);
612 scrollPane
.setPreferredSize( new Dimension( 700, 70 ) );
614 // JFrame frame = new JFrame("I have a question");
615 // frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
616 String s
= (String
)JOptionPane
.showInputDialog(
619 "Get full reference or name",
620 JOptionPane
.PLAIN_MESSAGE
,
632 * @throws TransformerException
633 * @throws TransformerFactoryConfigurationError
635 protected String
askWhichScientificName(String fullname
,String atomised
,String classificationName
, Node fullParagraph
) throws TransformerFactoryConfigurationError
, TransformerException
{
636 // logger.info("getScientificName for "+ fullname);
637 // JFrame frame = new JFrame("I have a question");
638 // frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
639 String k
= fullname
+"_"+atomised
;
641 String defaultN
= "";
642 if (atomised
.length()>fullname
.length()) {
648 if (namesAsked
.containsKey(k
)){
649 return namesAsked
.get(k
);
652 //activate it for ants because a lot of markup is incomplete
653 if (classificationName
.indexOf("Ants")>-1) {
657 JTextArea textArea
= new JTextArea("The names in the free text and in the xml tags do not match : "+fullname
+
658 ", or "+atomised
+"\n"+formatNode(fullParagraph
));
659 JScrollPane scrollPane
= new JScrollPane(textArea
);
660 textArea
.setLineWrap(true);
661 textArea
.setWrapStyleWord(true);
662 scrollPane
.setPreferredSize( new Dimension( 700, 200 ) );
663 String s
= (String
)JOptionPane
.showInputDialog(
666 "Which name do I have to use? The current classification is "+classificationName
,
667 JOptionPane
.PLAIN_MESSAGE
,
671 namesAsked
.put(k
, s
);
677 protected int askAddParent(String s
){
678 // boolean hack=true;
682 JTextArea textArea
= new JTextArea("If you want to add a parent taxa for "+s
+", click \"Yes\"." +
683 " If it is a root for this classification, click \"No\" or \"Cancel\".");
684 JScrollPane scrollPane
= new JScrollPane(textArea
);
685 textArea
.setLineWrap(true);
686 textArea
.setWrapStyleWord(true);
687 scrollPane
.setPreferredSize( new Dimension( 600, 70 ) );
689 Object
[] options
= { UIManager
.getString("OptionPane.yesButtonText"),
690 UIManager
.getString("OptionPane.noButtonText")};
693 int addTaxon
= JOptionPane
.showOptionDialog(null,
696 JOptionPane
.YES_NO_OPTION
,
704 protected String
askSetParent(String s
){
705 JTextArea textArea
= new JTextArea("What is the first taxon parent for "+s
+"?\n"+
706 "The rank will be asked later. ");
707 JScrollPane scrollPane
= new JScrollPane(textArea
);
708 textArea
.setLineWrap(true);
709 textArea
.setWrapStyleWord(true);
710 scrollPane
.setPreferredSize( new Dimension( 700, 200 ) );
712 String s2
= (String
)JOptionPane
.showInputDialog(
716 JOptionPane
.PLAIN_MESSAGE
,
723 protected String
askRank(String s
, List
<String
> rankListStr
){
724 JTextArea textArea
= new JTextArea("What is the rank for "+s
+"?");
725 JScrollPane scrollPane
= new JScrollPane(textArea
);
726 textArea
.setLineWrap(true);
727 textArea
.setWrapStyleWord(true);
728 scrollPane
.setPreferredSize( new Dimension( 700, 200 ) );
730 String r
= (String
)JOptionPane
.showInputDialog(
734 JOptionPane
.PLAIN_MESSAGE
,
736 rankListStr
.toArray(),
744 * @throws TransformerException
745 * @throws TransformerFactoryConfigurationError
747 protected String
askFeatureName(String paragraph
){
748 // logger.info("getScientificName for "+ fullname);
749 // JFrame frame = new JFrame("I have a question");
750 // frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
751 JTextArea textArea
= new JTextArea("How should the feature be named? \n"+paragraph
);
752 JScrollPane scrollPane
= new JScrollPane(textArea
);
753 textArea
.setLineWrap(true);
754 textArea
.setWrapStyleWord(true);
755 scrollPane
.setPreferredSize( new Dimension( 700, 200 ) );
756 String s
= (String
)JOptionPane
.showInputDialog(
760 JOptionPane
.PLAIN_MESSAGE
,
768 * @param taxonnamebase2
769 * @param bestMatchingTaxon
771 * @param similarityAuthor
774 protected boolean askIfReuseBestMatchingTaxon(NonViralName
<?
> taxonnamebase2
, Taxon bestMatchingTaxon
, Reference
<?
> refMods
, double similarityScore
, double similarityAuthor
) {
775 Object
[] options
= { UIManager
.getString("OptionPane.yesButtonText"),
776 UIManager
.getString("OptionPane.noButtonText")};
778 if (similarityScore
<0.66 && similarityAuthor
<0.5) {
780 // System.out.println("should say NO");
783 boolean sameSource
=false;
786 String sec
= refMods
.getTitleCache();
789 secBest
=bestMatchingTaxon
.getSec().getTitleCache();
791 catch(NullPointerException e
){
792 logger
.warn("no sec - ignore");
795 if (secBest
.isEmpty()) {
799 Object defaultOption
=options
[1];
800 if(sec
.equalsIgnoreCase(secBest
)
801 // || taxonnamebase2.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(bestMatchingTaxon.getTitleCache().split("sec.")[0].trim())
803 //System.out.println(sec+" and "+secBest);
806 if (similarityScore
>0.65 && (similarityAuthor
==-1 || similarityAuthor
>0.8)) {
807 defaultOption
=options
[0];
809 defaultOption
=options
[1];
812 if (similarityScore
>0.65 && similarityAuthor
>0.8) {
813 if(similarityScore
==1 ) {
816 defaultOption
=options
[0];
818 defaultOption
=options
[1];
822 String sourcesStr
="";
824 Set
<IdentifiableSource
> sources
= bestMatchingTaxon
.getSources();
825 for (IdentifiableSource src
:sources
){
827 String srcSec
=src
.getCitation().getTitleCache();
828 if(!srcSec
.isEmpty()){
829 sourcesStr
+="\n "+srcSec
;
830 if (srcSec
.equalsIgnoreCase(sec
)){
832 if (similarityScore
>0.65 && similarityAuthor
>0.8) {
833 defaultOption
=options
[0];
835 defaultOption
=options
[1];
840 logger
.warn("the source reference is maybe null, just ignore it.");
844 if (sameSource
&& similarityScore
>0.9999 && (similarityAuthor
==-1 || similarityAuthor
>0.8)) {
847 if(similarityScore
<0.66) {
848 defaultOption
=options
[1];
851 // //only activate it if you know the data you are importing (ok for Chenopodium)
852 if(defaultOption
==options
[1]) {
856 JTextArea textArea
=null;
857 if (!sourcesStr
.isEmpty()) {
858 textArea
= new JTextArea("Does "+taxonnamebase2
.toString()+" correspond to "
859 + bestMatchingTaxon
.toString()+" ?\n Click \"Yes\". if it does, click \"No\" if it does not."
860 + "\n The current sources are:"+ sourcesStr
);
862 textArea
= new JTextArea("Does "+taxonnamebase2
.toString()+" correspond to "
863 + bestMatchingTaxon
.toString()+" ?\n Click \"Yes\". if it does, click \"No\" if it does not.");
865 JScrollPane scrollPane
= new JScrollPane(textArea
);
866 textArea
.setLineWrap(true);
867 textArea
.setWrapStyleWord(true);
868 scrollPane
.setPreferredSize( new Dimension( 600, 70 ) );
870 int addTaxon
= JOptionPane
.showOptionDialog(null,
873 JOptionPane
.YES_NO_OPTION
,
886 * @param fullLineRefName
889 protected int askIfNameContained(String fullLineRefName
) {
891 JTextArea textArea
= new JTextArea("Is a scientific name contained in this sentence ? Type 0 if contains a name, 1 if it's only a reference. Press 2 if it's to be ignored \n"+fullLineRefName
);
892 JScrollPane scrollPane
= new JScrollPane(textArea
);
893 textArea
.setLineWrap(true);
894 textArea
.setWrapStyleWord(true);
895 scrollPane
.setPreferredSize( new Dimension( 600, 400 ) );
897 String s
= (String
)JOptionPane
.showInputDialog(
901 JOptionPane
.PLAIN_MESSAGE
,
905 return Integer
.valueOf(s
);
913 protected Rank
askForRank(String fullname
,Rank rank
, NomenclaturalCode nomenclaturalCode
) {
914 // logger.info("askForRank for "+ fullname+ ", "+rank);
915 // JFrame frame = new JFrame("I have a question");
916 // frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
918 if (ranksAsked
.containsKey(fullname
)){
919 return ranksAsked
.get(fullname
);
930 JTextArea textArea
= new JTextArea("What is the correct rank for "+fullname
+"?");
931 JScrollPane scrollPane
= new JScrollPane(textArea
);
932 textArea
.setLineWrap(true);
933 textArea
.setWrapStyleWord(true);
934 scrollPane
.setPreferredSize( new Dimension( 600, 50 ) );
936 List
<Rank
> rankList
= new ArrayList
<Rank
>();
937 rankList
= importer
.getTermService().list(Rank
.class, null, null, null, null);
939 List
<String
> rankListStr
= new ArrayList
<String
>();
940 for (Rank r
:rankList
) {
941 rankListStr
.add(r
.toString());
943 String s
= (String
)JOptionPane
.showInputDialog(
946 "The rank extracted from the TaxonX file is "+rank
.toString(),
947 JOptionPane
.PLAIN_MESSAGE
,
949 rankListStr
.toArray(),
955 cR
= Rank
.getRankByEnglishName(s
,nomenclaturalCode
,true);
957 } catch (UnknownCdmTypeException e
) {
958 logger
.warn("Unknown rank ?!"+s
);
962 ranksAsked
.put(fullname
,cR
);
969 * ask user to specify what kind of paragraph the current "multiple" section is
970 * default possibilities are "synonyms","material examined","distribution","image caption","other"
971 * could make sense to replace this list with the CDM-Feature list
972 * if "other" is selected, a second pop-up will be prompted to ask user to specify a new Feature name.
973 * @param fullParagraph : the current Node
974 * @return the section name
976 protected String
askMultiple(Node fullParagraph
){
979 fp
= formatNode(fullParagraph
);
980 } catch (TransformerFactoryConfigurationError e1
) {
981 // TODO Auto-generated catch block
982 e1
.printStackTrace();
983 } catch (TransformerException e1
) {
984 // TODO Auto-generated catch block
985 e1
.printStackTrace();
987 JTextArea textArea
= new JTextArea("What category is it for this paragraph \n"+fp
);
988 JScrollPane scrollPane
= new JScrollPane(textArea
);
989 textArea
.setLineWrap(true);
990 textArea
.setWrapStyleWord(true);
991 scrollPane
.setPreferredSize( new Dimension( 600, 400 ) );
993 String
[] possiblities
= {"synonyms","material examined","distribution","image caption","Other","vernacular name","type status","new category"};
996 String s
= (String
)JOptionPane
.showInputDialog(
1000 JOptionPane
.PLAIN_MESSAGE
,
1005 if (s
.equalsIgnoreCase("new category")) {
1007 s
=askFeatureName(formatNode(fullParagraph
));
1008 } catch (TransformerFactoryConfigurationError e
) {
1010 } catch (TransformerException e
) {
1021 * asks for the hierarchical parent, based on the current classification
1023 * @param classification
1024 * @return Taxon, the parent Taxon
1026 protected Taxon
askParent(Taxon taxon
,Classification classification
) {
1027 // System.out.println("ASK PARENT "+classification);
1028 // logger.info("ask Parent "+taxon.getTitleCache());
1029 Set
<TaxonNode
> allNodes
= classification
.getAllNodes();
1030 Map
<String
,Taxon
> nodesMap
= new HashMap
<String
, Taxon
>();
1032 for (TaxonNode tn
:allNodes
){
1033 Taxon t
= tn
.getTaxon();
1034 nodesMap
.put(t
.getTitleCache(), t
);
1036 List
<String
> nodeList
= new ArrayList
<String
>();
1037 for (String nl
: nodesMap
.keySet()) {
1038 nodeList
.add(nl
+" - "+nodesMap
.get(nl
).getName().getRank());
1040 Collections
.sort(nodeList
);
1041 nodeList
.add(0, "Not here!");
1043 JFrame frame
= new JFrame("I have a question");
1044 frame
.setDefaultCloseOperation(JFrame
.EXIT_ON_CLOSE
);
1045 String s
= (String
)JOptionPane
.showInputDialog(
1047 "What is the taxon parent for "+taxon
.getTitleCache()+"?",
1048 "The current classification is "+classification
.getTitleCache(),
1049 JOptionPane
.PLAIN_MESSAGE
,
1054 Taxon returnTaxon
= nodesMap
.get(s
.split(" - ")[0]);
1055 // logger.info("ask Parent returns "+s);
1062 * @param r: the rank as string (with dwc tags)
1063 * @return Rank : the Rank object corresponding to the current string
1066 protected Rank
getRank(String r
){
1068 r
=Rank
.UNKNOWN_RANK().toString();
1070 r
=r
.replace("dwcranks:", "");
1071 r
=r
.replace("dwc:","");
1073 Rank rank
= Rank
.UNKNOWN_RANK();
1074 if (r
.equalsIgnoreCase("Superfamily")) {
1075 rank
=Rank
.SUPERFAMILY();
1077 else if (r
.equalsIgnoreCase("Family")) {
1080 else if (r
.equalsIgnoreCase("Subfamily")) {
1081 rank
=Rank
.SUBFAMILY();
1083 else if (r
.equalsIgnoreCase("Tribe")) {
1086 else if (r
.equalsIgnoreCase("Subtribe")) {
1087 rank
=Rank
.SUBTRIBE();
1089 else if (r
.equalsIgnoreCase("Genus")) {
1092 else if (r
.equalsIgnoreCase("Subgenus")) {
1093 rank
=Rank
.SUBGENUS();
1095 else if (r
.equalsIgnoreCase("Section")) {
1096 rank
=Rank
.SECTION_BOTANY();
1098 else if (r
.equalsIgnoreCase("Subsection")) {
1099 rank
=Rank
.SUBSECTION_BOTANY();
1101 else if (r
.equalsIgnoreCase("Series")) {
1104 else if (r
.equalsIgnoreCase("Subseries")) {
1105 rank
=Rank
.SUBSERIES();
1107 else if (r
.equalsIgnoreCase("Species")) {
1108 rank
=Rank
.SPECIES();
1110 else if (r
.equalsIgnoreCase("Subspecies")) {
1111 rank
=Rank
.SUBSPECIES();
1113 else if (r
.equalsIgnoreCase("Variety") || r
.equalsIgnoreCase("varietyEpithet")) {
1114 rank
=Rank
.VARIETY();
1116 else if (r
.equalsIgnoreCase("Subvariety")) {
1117 rank
=Rank
.SUBVARIETY();
1119 else if (r
.equalsIgnoreCase("Form")) {
1122 else if (r
.equalsIgnoreCase("Subform")) {
1123 rank
=Rank
.SUBFORM();
1124 }else if (r
.equalsIgnoreCase("higher")) {
1125 // rank=Rank.SUPRAGENERICTAXON();
1126 logger
.warn("handling of 'higher' rank still unclear");
1134 * @param ato: atomised taxon name data
1135 * @return rank present in the xmldata fields
1137 protected Rank
getRank(Map
<String
, String
> ato
) {
1138 Rank rank
=Rank
.UNKNOWN_RANK();
1143 if (ato
.containsKey("dwc:family")){
1146 if (ato
.containsKey("dwc:tribe") || ato
.containsKey("dwcranks:tribe")){
1149 if (ato
.containsKey("dwc:genus")) {
1152 if (ato
.containsKey("dwc:subgenus")) {
1153 rank
= Rank
.SUBGENUS();
1155 if (ato
.containsKey("dwc:specificepithet") || ato
.containsKey("dwc:species")) {
1156 rank
= Rank
.SPECIES();
1158 if (ato
.containsKey("dwc:infraspecificepithet")) {
1159 rank
= Rank
.INFRASPECIES();
1161 if (ato
.containsKey("dwcranks:varietyepithet")) {
1162 rank
=Rank
.VARIETY();
1164 //popUp(rank.getTitleCache());
1169 * Format a XML node for a clean (screen) output with tags
1170 * @param Node : the node to format
1171 * @return String : the XML section formated for a screen output
1174 protected String
formatNode(Node node
) throws TransformerFactoryConfigurationError
, TransformerException
{
1175 Transformer transformer
= TransformerFactory
.newInstance().newTransformer();
1176 transformer
.setOutputProperty(OutputKeys
.INDENT
, "yes");
1177 //initialize StreamResult with File object to save to file
1178 StreamResult result
= new StreamResult(new StringWriter());
1179 DOMSource source
= new DOMSource(node
);
1180 transformer
.transform(source
, result
);
1181 String xmlString
= result
.getWriter().toString();
1185 protected boolean containsDistinctLetters(String word
){
1186 Set
<Character
> dl
= new HashSet
<Character
>();
1187 for (char a
: word
.toCharArray()) {
1190 if(dl
.size()>1 && word
.indexOf("no description text")==-1) {
1198 * Tries to match the status string against any new name status
1199 * and returns the status if it matches. Returns <code>null</code> otherwise.
1203 protected String
newNameStatus(String status
){
1204 String pattern
= "(" + "((sp|spec|gen|comb|)\\.\\s*nov.)" +
1205 "|(new\\s*(species|combination))" +
1206 "|(n\\.\\s*sp\\.)" +
1207 "|(sp\\.\\s*n\\.)" +
1209 if (status
.trim().matches(pattern
)){
1219 /** Creates an cdm-NomenclaturalCode by the tcs NomenclaturalCode
1221 protected NomenclaturalStatusType
nomStatusString2NomStatus (String nomStatus
) throws UnknownCdmTypeException
{
1223 if (nomStatus
== null){ return null;
1224 }else if ("Valid".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.VALID();
1226 }else if ("Alternative".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.ALTERNATIVE();
1227 }else if ("nom. altern.".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.ALTERNATIVE();
1229 }else if ("Ambiguous".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.AMBIGUOUS();
1231 }else if ("Doubtful".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.DOUBTFUL();
1233 }else if ("Confusum".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.CONFUSUM();
1235 }else if ("Illegitimate".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.ILLEGITIMATE();
1236 }else if ("nom. illeg.".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.ILLEGITIMATE();
1238 }else if ("Superfluous".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.SUPERFLUOUS();
1239 }else if ("nom. superfl.".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.SUPERFLUOUS();
1241 }else if ("Rejected".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.REJECTED();
1242 }else if ("nom. rej.".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.REJECTED();
1244 }else if ("Utique Rejected".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.UTIQUE_REJECTED();
1246 }else if ("Conserved Prop".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.CONSERVED_PROP();
1248 }else if ("Orthography Conserved Prop".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.ORTHOGRAPHY_CONSERVED_PROP();
1250 }else if ("Legitimate".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.LEGITIMATE();
1252 }else if ("Novum".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.NOVUM();
1253 }else if ("nom. nov.".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.NOVUM();
1255 }else if ("Utique Rejected Prop".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.UTIQUE_REJECTED_PROP();
1257 }else if ("Orthography Conserved".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.ORTHOGRAPHY_CONSERVED();
1259 }else if ("Rejected Prop".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.REJECTED_PROP();
1261 }else if ("Conserved".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.CONSERVED();
1262 }else if ("nom. cons.".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.CONSERVED();
1264 }else if ("Sanctioned".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.SANCTIONED();
1266 }else if ("Invalid".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.INVALID();
1267 }else if ("nom. inval.".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.INVALID();
1269 }else if ("Nudum".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.NUDUM();
1270 }else if ("nom. nud.".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.NUDUM();
1272 }else if ("Combination Invalid".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.COMBINATION_INVALID();
1274 }else if ("Provisional".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.PROVISIONAL();
1275 }else if ("nom. provis.".equalsIgnoreCase(nomStatus
)){return NomenclaturalStatusType
.PROVISIONAL();
1278 throw new UnknownCdmTypeException("Unknown Nomenclatural status type " + nomStatus
);
1284 protected SpecimenTypeDesignationStatus
typeStatusId2TypeStatus (int typeStatusId
) throws UnknownCdmTypeException
{
1285 switch (typeStatusId
){
1286 case 0: return null;
1287 case 1: return SpecimenTypeDesignationStatus
.HOLOTYPE();
1288 case 2: return SpecimenTypeDesignationStatus
.LECTOTYPE();
1289 case 3: return SpecimenTypeDesignationStatus
.NEOTYPE();
1290 case 4: return SpecimenTypeDesignationStatus
.EPITYPE();
1291 case 5: return SpecimenTypeDesignationStatus
.ISOLECTOTYPE();
1292 case 6: return SpecimenTypeDesignationStatus
.ISONEOTYPE();
1293 case 7: return SpecimenTypeDesignationStatus
.ISOTYPE();
1294 case 8: return SpecimenTypeDesignationStatus
.PARANEOTYPE();
1295 case 9: return SpecimenTypeDesignationStatus
.PARATYPE();
1296 case 10: return SpecimenTypeDesignationStatus
.SECOND_STEP_LECTOTYPE();
1297 case 11: return SpecimenTypeDesignationStatus
.SECOND_STEP_NEOTYPE();
1298 case 12: return SpecimenTypeDesignationStatus
.SYNTYPE();
1299 case 21: return SpecimenTypeDesignationStatus
.ICONOTYPE();
1300 case 22: return SpecimenTypeDesignationStatus
.PHOTOTYPE();
1302 throw new UnknownCdmTypeException("Unknown TypeDesignationStatus (id=" + Integer
.valueOf(typeStatusId
).toString() + ")");