Project

General

Profile

Download (50.6 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
 * Copyright (C) 2013 EDIT
4
 * European Distributed Institute of Taxonomy
5
 * http://www.e-taxonomy.eu
6
 *
7
 * The contents of this file are subject to the Mozilla Public License Version 1.1
8
 * See LICENSE.TXT at the top of this package for the full license terms.
9
 */
10
package eu.etaxonomy.cdm.io.taxonx2013;
11

    
12
import java.awt.Dimension;
13
import java.io.StringWriter;
14
import java.util.ArrayList;
15
import java.util.Collections;
16
import java.util.HashMap;
17
import java.util.HashSet;
18
import java.util.List;
19
import java.util.Map;
20
import java.util.Scanner;
21
import java.util.Set;
22
import java.util.UUID;
23

    
24
import javax.swing.JFrame;
25
import javax.swing.JOptionPane;
26
import javax.swing.JScrollPane;
27
import javax.swing.JTextArea;
28
import javax.swing.UIManager;
29
import javax.xml.transform.OutputKeys;
30
import javax.xml.transform.Transformer;
31
import javax.xml.transform.TransformerException;
32
import javax.xml.transform.TransformerFactory;
33
import javax.xml.transform.TransformerFactoryConfigurationError;
34
import javax.xml.transform.dom.DOMSource;
35
import javax.xml.transform.stream.StreamResult;
36

    
37
import org.apache.commons.lang.StringUtils;
38
import org.apache.log4j.Logger;
39
import org.w3c.dom.Node;
40
import org.w3c.dom.NodeList;
41

    
42
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
43
import eu.etaxonomy.cdm.api.service.IAgentService;
44
import eu.etaxonomy.cdm.io.specimen.UnitsGatheringArea;
45
import eu.etaxonomy.cdm.io.specimen.UnitsGatheringEvent;
46
import eu.etaxonomy.cdm.model.agent.AgentBase;
47
import eu.etaxonomy.cdm.model.agent.Person;
48
import eu.etaxonomy.cdm.model.common.CdmBase;
49
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
50
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
51
import eu.etaxonomy.cdm.model.common.Language;
52
import eu.etaxonomy.cdm.model.common.TimePeriod;
53
import eu.etaxonomy.cdm.model.description.Feature;
54
import eu.etaxonomy.cdm.model.description.TaxonDescription;
55
import eu.etaxonomy.cdm.model.description.TextData;
56
import eu.etaxonomy.cdm.model.location.NamedArea;
57
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
58
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
59
import eu.etaxonomy.cdm.model.name.NonViralName;
60
import eu.etaxonomy.cdm.model.name.Rank;
61
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
62
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
63
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
64
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
65
import eu.etaxonomy.cdm.model.occurrence.GatheringEvent;
66
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
67
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
68
import eu.etaxonomy.cdm.model.reference.IBook;
69
import eu.etaxonomy.cdm.model.reference.IBookSection;
70
import eu.etaxonomy.cdm.model.reference.Reference;
71
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
72
import eu.etaxonomy.cdm.model.taxon.Classification;
73
import eu.etaxonomy.cdm.model.taxon.Taxon;
74
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
75
import eu.etaxonomy.cdm.persistence.dto.UuidAndTitleCache;
76
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
77
import eu.etaxonomy.cdm.strategy.parser.ParserProblem;
78

    
79

    
80
/**
81
 * @author pkelbert
82
 * @date 2 avr. 2013
83
 *
84
 */
85
public class TaxonXExtractor {
86

    
87
    protected TaxonXImport importer;
88
    protected TaxonXImportState state2;
89
    private final Map<String,String> namesAsked = new HashMap<String, String>();
90
    private final Map<String,Rank>ranksAsked = new HashMap<String, Rank>();
91

    
92
    Logger logger = Logger.getLogger(TaxonXExtractor.class);
93

    
94
    public class ReferenceBuilder{
95
        private int nbRef=0;
96
        private boolean foundBibref=false;
97
        private final TaxonXAddSources sourceHandler;
98

    
99
        /**
100
         * @param sourceHandler
101
         */
102
        public ReferenceBuilder(TaxonXAddSources sourceHandler) {
103
            this.sourceHandler=sourceHandler;
104
        }
105

    
106
        /**
107
         * @return the foundBibref
108
         */
109
        public boolean isFoundBibref() {
110
            return foundBibref;
111
        }
112

    
113
        /**
114
         * @param foundBibref the foundBibref to set
115
         */
116
        public void setFoundBibref(boolean foundBibref) {
117
            this.foundBibref = foundBibref;
118
        }
119

    
120

    
121
        /**
122
         * @param ref
123
         * @param refMods
124
         */
125
        public void builReference(String mref, String treatmentMainName, NomenclaturalCode nomenclaturalCode,
126
                Taxon acceptedTaxon, Reference refMods) {
127
            // System.out.println("builReference "+mref);
128
            this.setFoundBibref(true);
129

    
130
            String ref= mref;
131
            if ( (ref.endsWith(";") ||ref.endsWith(",")  ) && ((ref.length())>1)) {
132
                ref=ref.substring(0, ref.length()-1)+".";
133
            }
134
            if (ref.startsWith(treatmentMainName) && !ref.endsWith(treatmentMainName)) {
135
                ref=ref.replace(treatmentMainName, "");
136
                ref=ref.trim();
137
                while (ref.startsWith(".") || ref.startsWith(",")) {
138
                    ref=ref.replace(".","").replace(",","").trim();
139
                }
140
            }
141

    
142
            //                        logger.info("Current reference :"+nbRef+", "+ref+", "+treatmentMainName+"--"+ref.indexOf(treatmentMainName));
143
            Reference reference = ReferenceFactory.newGeneric();
144
            reference.setTitleCache(ref, true);
145

    
146
            //only add the first one if there is no nomenclatural reference yet
147
            if (nbRef==0){
148
                if(acceptedTaxon.getName().getNomenclaturalReference()==null){
149
                    acceptedTaxon.getName().setNomenclaturalReference(reference);
150
                    sourceHandler.addSource(refMods, acceptedTaxon);
151
                }
152
            }
153
            //add all other references as Feature.Citation
154
            TaxonDescription taxonDescription =importer.getTaxonDescription(acceptedTaxon, false, true);
155
            acceptedTaxon.addDescription(taxonDescription);
156
            sourceHandler.addSource(refMods, acceptedTaxon);
157

    
158
            TextData textData = TextData.NewInstance(Feature.CITATION());
159
            Language language = Language.DEFAULT();
160
            textData.putText(language, ref);
161
            sourceHandler.addSource(reference, textData,acceptedTaxon.getName(),refMods);
162
            taxonDescription.addElement(textData);
163

    
164
            sourceHandler.addSource(refMods, taxonDescription);
165

    
166
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
167
            //                        logger.warn("BWAAHHHH: "+nameToBeFilled.getParsingProblems()+", "+ref);
168
            nbRef++;
169

    
170
        }
171

    
172
    }
173

    
174
    public class MySpecimenOrObservation{
175
        String descr="";
176
        DerivedUnit derivedUnitBase=null;
177

    
178
        public String getDescr() {
179
            return descr;
180
        }
181
        public void setDescr(String descr) {
182
            this.descr = descr;
183
        }
184
        public DerivedUnit getDerivedUnitBase() {
185
            return derivedUnitBase;
186
        }
187
        public void setDerivedUnitBase(DerivedUnit derivedUnitBase) {
188
            this.derivedUnitBase = derivedUnitBase;
189
        }
190

    
191

    
192

    
193

    
194
    }
195

    
196
    /**
197
     * @param item
198
     * @return
199
     */
200
    @SuppressWarnings({ "unused", "rawtypes" })
201
    protected MySpecimenOrObservation extractSpecimenOrObservation(Node specimenObservationNode, DerivedUnit derivedUnitBase,
202
            SpecimenOrObservationType defaultAssociation, TaxonNameBase<?,?> typifiableName) {
203
        String country=null;
204
        String locality=null;
205
        String stateprov=null;
206
        String collector=null;
207
        String fieldNumber=null;
208
        Double latitude=null,longitude=null;
209
        TimePeriod tp =null;
210
        String day,month,year="";
211
        String descr="not available";
212
        String type="";
213
        boolean asso=false;
214
        NodeList eventContent =null;
215
        // create facade
216
        DerivedUnitFacade derivedUnitFacade = null;
217

    
218
        UnitsGatheringEvent unitsGatheringEvent;
219
        UnitsGatheringArea unitsGatheringArea;
220
        DefinedTermBase areaCountry;
221

    
222
        MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
223

    
224
        NodeList xmldata= specimenObservationNode.getChildNodes();
225
        for (int n=0;n<xmldata.getLength();n++){
226
            eventContent=xmldata.item(n).getChildNodes();
227
            if (xmldata.item(n).getNodeName().equalsIgnoreCase("tax:xmldata")){
228
                asso=true;
229
                country=null;
230
                locality=null;
231
                stateprov=null;
232
                collector=null;
233
                fieldNumber=null;
234
                latitude=null;
235
                longitude=null;
236
                day="";
237
                month="";
238
                year="";
239
                type="";
240
                for (int j=0;j<eventContent.getLength();j++){
241
                    if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:country")){
242
                        country=eventContent.item(j).getTextContent().trim();
243
                    }
244
                    else if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:locality")){
245
                        locality=eventContent.item(j).getTextContent().trim();
246
                    }
247
                    else  if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:stateprovince")){
248
                        stateprov=eventContent.item(j).getTextContent().trim();
249
                    }
250
                    else  if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:collector")){
251
                        collector=eventContent.item(j).getTextContent().trim();
252
                    }
253
                    else  if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:yearcollected")){
254
                        year=eventContent.item(j).getTextContent().trim();
255
                    }
256
                    else  if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:monthcollected")){
257
                        month=eventContent.item(j).getTextContent().trim();
258
                    }
259
                    else  if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:daycollected")){
260
                        day=eventContent.item(j).getTextContent().trim();
261
                    }
262
                    else  if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:decimallongitude")){
263
                        String tmp = eventContent.item(j).getTextContent().trim();
264
                        try{longitude=Double.valueOf(tmp);}catch(Exception e){logger.warn("longitude is not a number");}
265
                    }
266
                    else  if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:decimallatitude")){
267
                        String tmp = eventContent.item(j).getTextContent().trim();
268
                        try{latitude=Double.valueOf(tmp);}catch(Exception e){logger.warn("latitude is not a number");}
269
                    }else if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:TypeStatus")){
270
                        type = eventContent.item(j).getTextContent().trim();
271
                    }else if(eventContent.item(j).getNodeName().equalsIgnoreCase("#text") && StringUtils.isBlank(eventContent.item(j).getTextContent())){
272
                        //do nothing
273
                    }
274
                    else {
275
                        logger.info("UNEXTRACTED FIELD FOR SPECIMEN "+eventContent.item(j).getNodeName()+", "+eventContent.item(j).getTextContent()) ;
276
                    }
277
                }
278
                if (!day.isEmpty() || !month.isEmpty() || !year.isEmpty()){
279
                    try{
280
                        if (!year.isEmpty()) {
281
                            tp = TimePeriod.NewInstance(Integer.parseInt(year));
282
                            if (!month.isEmpty()) {
283
                                tp.setStartMonth(Integer.parseInt(month));
284
                                if (!day.isEmpty()) {
285
                                    tp.setStartDay(Integer.parseInt(day));
286
                                }
287
                            }
288

    
289
                        }
290
                    }catch(Exception e){
291
                        logger.warn("Collection date error "+e);
292
                    }
293
                }
294
            }
295
            if(xmldata.item(n).getNodeName().equalsIgnoreCase("#text")){
296
                descr=xmldata.item(n).getTextContent().replaceAll(";","").trim();
297
                if (descr.length()>1 && containsDistinctLetters(descr)) {
298
                    specimenOrObservation.setDescr(descr);
299
                    asso=true;
300
                }
301
            }
302
            if(xmldata.item(n).getNodeName().equalsIgnoreCase("tax:p")){
303
                descr=xmldata.item(n).getTextContent().replaceAll(";","").trim();
304
                if (descr.length()>1 && containsDistinctLetters(descr)) {
305
                    specimenOrObservation.setDescr(descr);
306
                    asso=true;
307
                }
308
            }
309
        }
310
        //        if(asso && descr.length()>1){
311

    
312
        //            logger.info("DESCR: "+descr);
313
        if (!type.isEmpty()) {
314
            if (!containsDistinctLetters(type)) {
315
                type="no description text";
316
            }
317
            derivedUnitFacade = getFacade(type.replaceAll(";",""), defaultAssociation);
318
            SpecimenTypeDesignation designation = SpecimenTypeDesignation.NewInstance();
319

    
320
            if (typifiableName != null){
321
            	typifiableName.addTypeDesignation(designation, true);
322
            }else{
323
            	logger.warn("No typifiable name available");
324
            }
325
            SpecimenTypeDesignationStatus stds= getSpecimenTypeDesignationStatusByKey(type);
326
            if (stds !=null) {
327
                stds = (SpecimenTypeDesignationStatus) importer.getTermService().find(stds.getUuid());
328
            }
329

    
330
            designation.setTypeStatus(stds);
331
            derivedUnitFacade.innerDerivedUnit().addSpecimenTypeDesignation(designation);
332

    
333
            derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
334
            // System.out.println("derivedUnitBase: "+derivedUnitBase);
335
            //                designation.setTypeSpecimen(derivedUnitBase);
336
            //                TaxonNameBase<?,?> name = taxon.getName();
337
            //                name.addTypeDesignation(designation, true);
338
        } else {
339
            if (!containsDistinctLetters(descr.replaceAll(";",""))) {
340
                descr="no description text";
341
            }
342

    
343
            derivedUnitFacade = getFacade(descr.replaceAll(";",""), defaultAssociation);
344
            derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
345
            // System.out.println("derivedUnitBase2: "+derivedUnitBase);
346
        }
347

    
348
        unitsGatheringEvent = new UnitsGatheringEvent(importer.getTermService(), locality,collector,longitude, latitude,
349
                state2.getConfig(),importer.getAgentService());
350

    
351
        if(tp!=null) {
352
            unitsGatheringEvent.setGatheringDate(tp);
353
        }
354

    
355
        // country
356
        unitsGatheringArea = new UnitsGatheringArea();
357
        unitsGatheringArea.setParams(null, country, state2.getConfig(), importer.getTermService(), importer.getOccurrenceService());
358
        //TODO other areas
359
        if (StringUtils.isNotBlank(stateprov)){
360
        	Map<String, String> namedAreas = new HashMap<String, String>();
361
        	namedAreas.put(stateprov, null);
362
            unitsGatheringArea.setAreaNames(namedAreas, state2.getConfig(), importer.getTermService(), importer.getVocabularyService());
363
        }
364

    
365
        areaCountry =  unitsGatheringArea.getCountry();
366

    
367
        //                         // other areas
368
        //                         unitsGatheringArea = new UnitsGatheringArea(namedAreaList,dataHolder.getTermService());
369
        //                         ArrayList<DefinedTermBase> nas = unitsGatheringArea.getAreas();
370
        //                         for (DefinedTermBase namedArea : nas) {
371
        //                             unitsGatheringEvent.addArea(namedArea);
372
        //                         }
373

    
374
        // copy gathering event to facade
375
        GatheringEvent gatheringEvent = unitsGatheringEvent.getGatheringEvent();
376
        derivedUnitFacade.setGatheringEvent(gatheringEvent);
377
        derivedUnitFacade.setLocality(gatheringEvent.getLocality());
378
        derivedUnitFacade.setExactLocation(gatheringEvent.getExactLocation());
379
        derivedUnitFacade.setCollector(gatheringEvent.getCollector());
380
        derivedUnitFacade.setCountry((NamedArea)areaCountry);
381

    
382
        for(DefinedTermBase<?> area:unitsGatheringArea.getAreas()){
383
            derivedUnitFacade.addCollectingArea((NamedArea) area);
384
        }
385
        //                         derivedUnitFacade.addCollectingAreas(unitsGatheringArea.getAreas());
386

    
387
        // add fieldNumber
388
        if (fieldNumber != null) {
389
            derivedUnitFacade.setFieldNumber(fieldNumber);
390
        }
391
        specimenOrObservation.setDerivedUnitBase(derivedUnitBase);
392
        //        }
393
        return specimenOrObservation;
394
    }
395

    
396

    
397
    private SpecimenTypeDesignationStatus getSpecimenTypeDesignationStatusByKey(
398
            String key) {
399
        if (key == null) {
400
            return null;
401
        } else if (key.matches("(?i)(T|Type)")) {
402
            return SpecimenTypeDesignationStatus.TYPE();
403
        } else if (key.matches("(?i)(HT|Holotype)")) {
404
            return SpecimenTypeDesignationStatus.HOLOTYPE();
405
        } else if (key.matches("(?i)(LT|Lectotype)")) {
406
            return SpecimenTypeDesignationStatus.LECTOTYPE();
407
        } else if (key.matches("(?i)(NT|Neotype)")) {
408
            return SpecimenTypeDesignationStatus.NEOTYPE();
409
        } else if (key.matches("(?i)(ST|Syntype)")) {
410
            return SpecimenTypeDesignationStatus.SYNTYPE();
411
        } else if (key.matches("(?i)(ET|Epitype)")) {
412
            return SpecimenTypeDesignationStatus.EPITYPE();
413
        } else if (key.matches("(?i)(IT|Isotype)")) {
414
            return SpecimenTypeDesignationStatus.ISOTYPE();
415
        } else if (key.matches("(?i)(ILT|Isolectotype)")) {
416
            return SpecimenTypeDesignationStatus.ISOLECTOTYPE();
417
        } else if (key.matches("(?i)(INT|Isoneotype)")) {
418
            return SpecimenTypeDesignationStatus.ISONEOTYPE();
419
        } else if (key.matches("(?i)(IET|Isoepitype)")) {
420
            return SpecimenTypeDesignationStatus.ISOEPITYPE();
421
        } else if (key.matches("(?i)(PT|Paratype)")) {
422
            return SpecimenTypeDesignationStatus.PARATYPE();
423
        } else if (key.matches("(?i)(PLT|Paralectotype)")) {
424
            return SpecimenTypeDesignationStatus.PARALECTOTYPE();
425
        } else if (key.matches("(?i)(PNT|Paraneotype)")) {
426
            return SpecimenTypeDesignationStatus.PARANEOTYPE();
427
        } else if (key.matches("(?i)(unsp.|Unspecified)")) {
428
            return SpecimenTypeDesignationStatus.UNSPECIFIC();
429
        } else if (key.matches("(?i)(2LT|Second Step Lectotype)")) {
430
            return SpecimenTypeDesignationStatus.SECOND_STEP_LECTOTYPE();
431
        } else if (key.matches("(?i)(2NT|Second Step Neotype)")) {
432
            return SpecimenTypeDesignationStatus.SECOND_STEP_NEOTYPE();
433
        } else if (key.matches("(?i)(OM|Original Material)")) {
434
            return SpecimenTypeDesignationStatus.ORIGINAL_MATERIAL();
435
        } else if (key.matches("(?i)(IcT|Iconotype)")) {
436
            return SpecimenTypeDesignationStatus.ICONOTYPE();
437
        } else if (key.matches("(?i)(PT|Phototype)")) {
438
            return SpecimenTypeDesignationStatus.PHOTOTYPE();
439
        } else if (key.matches("(?i)(IST|Isosyntype)")) {
440
            return SpecimenTypeDesignationStatus.ISOSYNTYPE();
441
        } else {
442
            return null;
443
        }
444
    }
445
    protected DerivedUnitFacade getFacade(String recordBasis, SpecimenOrObservationType defaultAssoc) {
446
        // System.out.println("getFacade() for "+recordBasis+", defaultassociation: "+defaultAssoc);
447
        SpecimenOrObservationType type = null;
448

    
449
        // create specimen
450
        if (recordBasis != null) {
451
            String recordBasisL = recordBasis.toLowerCase();
452
            if (recordBasisL.startsWith("specimen") || recordBasisL.contains("specimen") || recordBasisL.contains("type")) {// specimen
453
                type = SpecimenOrObservationType.PreservedSpecimen;
454
            }
455
            if (recordBasisL.startsWith("observation")) {
456
                type = SpecimenOrObservationType.Observation;
457
            }
458
            if (recordBasisL.contains("fossil")) {
459
                type = SpecimenOrObservationType.Fossil;
460
            }
461

    
462
            if (recordBasisL.startsWith("living")) {
463
                type = SpecimenOrObservationType.LivingSpecimen;
464
            }
465
            if (type == null) {
466
                logger.info("The basis of record does not seem to be known: *" + recordBasisL+"*");
467
                type = defaultAssoc;
468
            }
469
            // TODO fossils?
470
        } else {
471
            logger.info("The basis of record is null");
472
            type = defaultAssoc;
473
        }
474
        DerivedUnitFacade derivedUnitFacade = DerivedUnitFacade.NewInstance(type);
475
        return derivedUnitFacade;
476
    }
477

    
478

    
479

    
480
    @SuppressWarnings("rawtypes")
481
    protected Feature makeFeature(SpecimenOrObservationBase unit) {
482
        if (unit == null){
483
            return null;
484
        }
485
        SpecimenOrObservationType type = unit.getRecordBasis();
486

    
487
        if (type.isFeatureObservation()){
488
            return Feature.OBSERVATION();
489
        }else if (type.isPreservedSpecimen() ||
490
                type == SpecimenOrObservationType.LivingSpecimen ||
491
                type == SpecimenOrObservationType.OtherSpecimen
492
                ){
493
            return Feature.SPECIMEN();
494
        }else if (type == SpecimenOrObservationType.Unknown ||
495
                type == SpecimenOrObservationType.DerivedUnit
496
                ) {
497
            return Feature.INDIVIDUALS_ASSOCIATION();
498
        }
499
        logger.warn("No feature defined for derived unit class: "
500
                + unit.getClass().getSimpleName());
501
        return null;
502
    }
503

    
504

    
505
    protected final static String SPLITTER = ",";
506

    
507

    
508
    protected  int askQuestion(String question){
509
        Scanner scan = new Scanner(System.in);
510
        logger.info(question);
511
        int index = scan.nextInt();
512
        return index;
513
    }
514

    
515

    
516
    /**
517
     * @param reftype
518
     * @return
519
     */
520
    protected Reference getReferenceWithType(int reftype) {
521
        Reference ref = null;
522
        switch (reftype) {
523
        case 1:
524
            ref = ReferenceFactory.newGeneric();
525
            break;
526
        case 2:
527
            IBook tmp= ReferenceFactory.newBook();
528
            ref = (Reference)tmp;
529
            break;
530
        case 3:
531
            ref = ReferenceFactory.newArticle();
532
            break;
533
        case 4:
534
            IBookSection tmp2 = ReferenceFactory.newBookSection();
535
            ref = (Reference)tmp2;
536
            break;
537
        case 5:
538
            ref = ReferenceFactory.newJournal();
539
            break;
540
        case 6:
541
            ref = ReferenceFactory.newPrintSeries();
542
            break;
543
        case 7:
544
            ref = ReferenceFactory.newThesis();
545
            break;
546
        default:
547
            break;
548
        }
549
        return ref;
550
    }
551
    /**
552
     * @param unitsList
553
     * @param state
554
     */
555
    protected void prepareCollectors(TaxonXImportState state,IAgentService agentService) {
556
        //        logger.info("PREPARE COLLECTORS");
557
        List<String> collectors = new ArrayList<String>();
558
        String tmp;
559
        List<String> collectorsU = new ArrayList<String>(new HashSet<String>(collectors));
560
        Set<UUID> uuids = new HashSet<UUID>();
561

    
562
        //existing persons in DB
563
        List<UuidAndTitleCache<Person>> hiberPersons = agentService.getPersonUuidAndTitleCache();
564
        Map<String,Person> titleCachePerson = new HashMap<String, Person>();
565
        uuids = new HashSet<UUID>();
566
        for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
567
            uuids.add(hibernateP.getUuid());
568
        }
569

    
570
        if (!uuids.isEmpty()){
571
            List<AgentBase> existingPersons = agentService.find(uuids);
572
            for (AgentBase existingP:existingPersons){
573
                titleCachePerson.put(existingP.getTitleCache(),CdmBase.deproxy(existingP, Person.class));
574
            }
575
        }
576

    
577
        Map<String,UUID> personMap = new HashMap<String, UUID>();
578
        for (UuidAndTitleCache<Person> person:hiberPersons){
579
            personMap.put(person.getTitleCache(), person.getUuid());
580
        }
581

    
582
        java.util.Collection<AgentBase> personToadd = new ArrayList<AgentBase>();
583

    
584
        for (String collector:collectorsU){
585
            Person p = Person.NewInstance();
586
            p.setTitleCache(collector,true);
587
            if (!personMap.containsKey(p.getTitleCache())){
588
                personToadd.add(p);
589
            }
590
        }
591

    
592
        if(!personToadd.isEmpty()){
593
            Map<UUID, AgentBase> uuuidPerson = agentService.save(personToadd);
594
            for (UUID u:uuuidPerson.keySet()){
595
                titleCachePerson.put(uuuidPerson.get(u).getTitleCache(), CdmBase.deproxy(uuuidPerson.get(u), Person.class));
596
            }
597
        }
598

    
599
        state.getConfig().setPersons(titleCachePerson);
600
    }
601

    
602
    /**
603
     * @param name
604
     * @return
605
     */
606
    protected String getFullReference(String name, List<ParserProblem> problems) {
607
        //        logger.info("getFullReference for "+ name);
608
        JTextArea textArea = new JTextArea("Complete the reference or the name '"+name+"'.\nThe current problem is "+StringUtils.join(problems,"--"));
609
        JScrollPane scrollPane = new JScrollPane(textArea);
610
        textArea.setLineWrap(true);
611
        textArea.setWrapStyleWord(true);
612
        scrollPane.setPreferredSize( new Dimension( 700, 70 ) );
613

    
614
        //        JFrame frame = new JFrame("I have a question");
615
        //        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
616
        String s = (String)JOptionPane.showInputDialog(
617
                null,
618
                scrollPane,
619
                "Get full reference or name",
620
                JOptionPane.PLAIN_MESSAGE,
621
                null,
622
                null,
623
                name);
624
        return s;
625
    }
626

    
627

    
628

    
629
    /**
630
     * @param name
631
     * @return
632
     * @throws TransformerException
633
     * @throws TransformerFactoryConfigurationError
634
     */
635
    protected String askWhichScientificName(String fullname,String atomised,String classificationName, Node fullParagraph) throws TransformerFactoryConfigurationError, TransformerException {
636
        //        logger.info("getScientificName for "+ fullname);
637
        //        JFrame frame = new JFrame("I have a question");
638
        //        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
639
        String k = fullname+"_"+atomised;
640

    
641
        String defaultN = "";
642
        if (atomised.length()>fullname.length()) {
643
            defaultN=atomised;
644
        } else {
645
            defaultN=fullname;
646
        }
647

    
648
        if (namesAsked.containsKey(k)){
649
            return namesAsked.get(k);
650
        }
651
        else{
652
            //activate it for ants because a lot of markup is incomplete
653
            if (classificationName.indexOf("Ants")>-1) {
654
                return defaultN;
655
            }
656

    
657
            JTextArea textArea = new JTextArea("The names in the free text and in the xml tags do not match : "+fullname+
658
                    ", or "+atomised+"\n"+formatNode(fullParagraph));
659
            JScrollPane scrollPane = new JScrollPane(textArea);
660
            textArea.setLineWrap(true);
661
            textArea.setWrapStyleWord(true);
662
            scrollPane.setPreferredSize( new Dimension( 700, 200 ) );
663
            String s = (String)JOptionPane.showInputDialog(
664
                    null,
665
                    scrollPane,
666
                    "Which name do I have to use? The current classification is "+classificationName,
667
                    JOptionPane.PLAIN_MESSAGE,
668
                    null,
669
                    null,
670
                    defaultN);
671
            namesAsked.put(k, s);
672
            return s;
673
        }
674
    }
675

    
676

    
677
    protected int askAddParent(String s){
678
        //        boolean hack=true;
679
        //        if (hack) {
680
        //            return 1;
681
        //        }
682
        JTextArea textArea = new JTextArea("If you want to add a parent taxa for "+s+", click \"Yes\"." +
683
                " If it is a root for this classification, click \"No\" or \"Cancel\".");
684
        JScrollPane scrollPane = new JScrollPane(textArea);
685
        textArea.setLineWrap(true);
686
        textArea.setWrapStyleWord(true);
687
        scrollPane.setPreferredSize( new Dimension( 600, 70 ) );
688

    
689
        Object[] options = { UIManager.getString("OptionPane.yesButtonText"),
690
                UIManager.getString("OptionPane.noButtonText")};
691

    
692

    
693
        int addTaxon = JOptionPane.showOptionDialog(null,
694
                scrollPane,
695
                "",
696
                JOptionPane.YES_NO_OPTION,
697
                0,
698
                null,
699
                options,
700
                options[1]);
701
        return addTaxon;
702
    }
703

    
704
    protected String askSetParent(String s){
705
        JTextArea textArea =  new JTextArea("What is the first taxon parent for "+s+"?\n"+
706
                "The rank will be asked later. ");
707
        JScrollPane scrollPane = new JScrollPane(textArea);
708
        textArea.setLineWrap(true);
709
        textArea.setWrapStyleWord(true);
710
        scrollPane.setPreferredSize( new Dimension( 700, 200 ) );
711

    
712
        String s2 = (String)JOptionPane.showInputDialog(
713
                null,
714
                scrollPane,
715
                "",
716
                JOptionPane.PLAIN_MESSAGE,
717
                null,
718
                null,
719
                s);
720
        return s2;
721
    }
722

    
723
    protected String askRank(String s, List<String> rankListStr){
724
        JTextArea  textArea = new JTextArea("What is the rank for "+s+"?");
725
        JScrollPane scrollPane = new JScrollPane(textArea);
726
        textArea.setLineWrap(true);
727
        textArea.setWrapStyleWord(true);
728
        scrollPane.setPreferredSize( new Dimension( 700, 200 ) );
729

    
730
        String r = (String)JOptionPane.showInputDialog(
731
                null,
732
                scrollPane,
733
                "",
734
                JOptionPane.PLAIN_MESSAGE,
735
                null,
736
                rankListStr.toArray(),
737
                null);
738
        return r;
739
    }
740

    
741
    /**
742
     * @param name
743
     * @return
744
     * @throws TransformerException
745
     * @throws TransformerFactoryConfigurationError
746
     */
747
    protected String askFeatureName(String paragraph){
748
        //        logger.info("getScientificName for "+ fullname);
749
        //        JFrame frame = new JFrame("I have a question");
750
        //        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
751
        JTextArea textArea = new JTextArea("How should the feature be named? \n"+paragraph);
752
        JScrollPane scrollPane = new JScrollPane(textArea);
753
        textArea.setLineWrap(true);
754
        textArea.setWrapStyleWord(true);
755
        scrollPane.setPreferredSize( new Dimension( 700, 200 ) );
756
        String s = (String)JOptionPane.showInputDialog(
757
                null,
758
                scrollPane,
759
                "",
760
                JOptionPane.PLAIN_MESSAGE,
761
                null,
762
                null,
763
                "Other");
764
        return s;
765
    }
766

    
767
    /**
768
     * @param taxonnamebase2
769
     * @param bestMatchingTaxon
770
     * @param refMods
771
     * @param similarityAuthor
772
     * @return
773
     */
774
    protected boolean askIfReuseBestMatchingTaxon(NonViralName<?> taxonnamebase2, Taxon bestMatchingTaxon, Reference refMods, double similarityScore, double similarityAuthor) {
775
        Object[] options = { UIManager.getString("OptionPane.yesButtonText"),
776
                UIManager.getString("OptionPane.noButtonText")};
777

    
778
        if (similarityScore<0.66 &&  similarityAuthor<0.5) {
779
            return false;
780
            //            System.out.println("should say NO");
781
        }
782

    
783
        boolean sameSource=false;
784
        boolean noRef=false;
785

    
786
        String sec = refMods.getTitleCache();
787
        String secBest = "";
788
        try{
789
            secBest=bestMatchingTaxon.getSec().getTitleCache();
790
        }
791
        catch(NullPointerException e){
792
            logger.warn("no sec - ignore");
793
        }
794

    
795
        if (secBest.isEmpty()) {
796
            noRef=true;
797
        }
798

    
799
        Object defaultOption=options[1];
800
        if(sec.equalsIgnoreCase(secBest)
801
                //                ||                taxonnamebase2.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(bestMatchingTaxon.getTitleCache().split("sec.")[0].trim())
802
                ) {
803
            //System.out.println(sec+" and "+secBest);
804
            sameSource=true;
805
            //-1 <=> no author
806
            if (similarityScore>0.65 && (similarityAuthor==-1 || similarityAuthor>0.8)) {
807
                defaultOption=options[0];
808
            } else {
809
                defaultOption=options[1];
810
            }
811
        } else {
812
            if (similarityScore>0.65 && similarityAuthor>0.8) {
813
                if(similarityScore==1 ) {
814
                    return true;
815
                }
816
                defaultOption=options[0];
817
            } else {
818
                defaultOption=options[1];
819
            }
820
        }
821

    
822
        String sourcesStr="";
823

    
824
        Set<IdentifiableSource> sources = bestMatchingTaxon.getSources();
825
        for (IdentifiableSource src:sources){
826
            try{
827
                String srcSec=src.getCitation().getTitleCache();
828
                if(!srcSec.isEmpty()){
829
                    sourcesStr+="\n "+srcSec;
830
                    if (srcSec.equalsIgnoreCase(sec)){
831
                        sameSource=true;
832
                        if (similarityScore>0.65 && similarityAuthor>0.8) {
833
                            defaultOption=options[0];
834
                        } else {
835
                            defaultOption=options[1];
836
                        }
837
                    }
838
                }
839
            }catch(Exception e){
840
                logger.warn("the source reference is maybe null, just ignore it.");
841
            }
842
        }
843

    
844
        if (sameSource && similarityScore>0.9999 && (similarityAuthor==-1 || similarityAuthor>0.8)) {
845
            return true;
846
        }
847
        if(similarityScore<0.66) {
848
            defaultOption=options[1];
849
        }
850

    
851
        //        //only activate it if you know the data you are importing (ok for Chenopodium)
852
        if(defaultOption==options[1]) {
853
            return false;
854
        }
855

    
856
        JTextArea textArea =null;
857
        if (!sourcesStr.isEmpty()) {
858
            textArea = new JTextArea("Does "+taxonnamebase2.toString()+" correspond to "
859
                    + bestMatchingTaxon.toString()+" ?\n Click \"Yes\". if it does, click \"No\" if it does not."
860
                    + "\n The current sources are:"+ sourcesStr);
861
        } else {
862
            textArea = new JTextArea("Does "+taxonnamebase2.toString()+" correspond to "
863
                    + bestMatchingTaxon.toString()+" ?\n Click \"Yes\". if it does, click \"No\" if it does not.");
864
        }
865
        JScrollPane scrollPane = new JScrollPane(textArea);
866
        textArea.setLineWrap(true);
867
        textArea.setWrapStyleWord(true);
868
        scrollPane.setPreferredSize( new Dimension( 600, 70 ) );
869

    
870
        int addTaxon = JOptionPane.showOptionDialog(null,
871
                scrollPane,
872
                refMods.toString(),
873
                JOptionPane.YES_NO_OPTION,
874
                0,
875
                null,
876
                options,
877
                defaultOption);
878
        if(addTaxon==1) {
879
            return false;
880
        } else {
881
            return true;
882
        }
883
    }
884

    
885
    /**
886
     * @param fullLineRefName
887
     * @return
888
     */
889
    protected int askIfNameContained(String fullLineRefName) {
890

    
891
        JTextArea textArea = new JTextArea("Is a scientific name contained in this sentence ? Type 0 if contains a name, 1 if it's only a reference. Press 2 if it's to be ignored \n"+fullLineRefName);
892
        JScrollPane scrollPane = new JScrollPane(textArea);
893
        textArea.setLineWrap(true);
894
        textArea.setWrapStyleWord(true);
895
        scrollPane.setPreferredSize( new Dimension( 600, 400 ) );
896

    
897
        String s = (String)JOptionPane.showInputDialog(
898
                null,
899
                scrollPane,
900
                "",
901
                JOptionPane.PLAIN_MESSAGE,
902
                null,
903
                null,
904
                "0");
905
        return Integer.valueOf(s);
906
    }
907

    
908

    
909
    /**
910
     * @param name
911
     * @return
912
     */
913
    protected Rank askForRank(String fullname,Rank rank, NomenclaturalCode nomenclaturalCode) {
914
        //        logger.info("askForRank for "+ fullname+ ", "+rank);
915
        //        JFrame frame = new JFrame("I have a question");
916
        //        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
917

    
918
        if (ranksAsked.containsKey(fullname)){
919
            return ranksAsked.get(fullname);
920
        }
921
        else{
922
            boolean np=false;
923
            int npi=0;
924
            Rank cR = null;
925

    
926
            while (!np && npi<2)
927
            {
928

    
929

    
930
                JTextArea textArea = new JTextArea("What is the correct rank for "+fullname+"?");
931
                JScrollPane scrollPane = new JScrollPane(textArea);
932
                textArea.setLineWrap(true);
933
                textArea.setWrapStyleWord(true);
934
                scrollPane.setPreferredSize( new Dimension( 600, 50 ) );
935

    
936
                List<Rank> rankList = new ArrayList<Rank>();
937
                rankList = importer.getTermService().list(Rank.class, null, null, null, null);
938

    
939
                List<String> rankListStr = new ArrayList<String>();
940
                for (Rank r:rankList) {
941
                    rankListStr.add(r.toString());
942
                }
943
                String s = (String)JOptionPane.showInputDialog(
944
                        null,
945
                        scrollPane,
946
                        "The rank extracted from the TaxonX file is "+rank.toString(),
947
                        JOptionPane.PLAIN_MESSAGE,
948
                        null,
949
                        rankListStr.toArray(),
950
                        rank.toString());
951

    
952

    
953
                try {
954
                    npi++;
955
                    cR = Rank.getRankByEnglishName(s,nomenclaturalCode,true);
956
                    np=true;
957
                } catch (UnknownCdmTypeException e) {
958
                    logger.warn("Unknown rank ?!"+s);
959
                    logger.warn(e);
960
                }
961
            }
962
            ranksAsked.put(fullname,cR);
963
            return cR;
964

    
965
        }
966
    }
967

    
968
    /**
969
     * ask user to specify what kind of paragraph the current "multiple" section is
970
     * default possibilities are "synonyms","material examined","distribution","image caption","other"
971
     * could make sense to replace this list with the CDM-Feature list
972
     * if "other" is selected, a second pop-up will be prompted to ask user to specify a new Feature name.
973
     * @param fullParagraph : the current Node
974
     * @return the section name
975
     * */
976
    protected String askMultiple(Node fullParagraph){
977
        String fp = "";
978
        try {
979
            fp = formatNode(fullParagraph);
980
        } catch (TransformerFactoryConfigurationError e1) {
981
            // TODO Auto-generated catch block
982
            e1.printStackTrace();
983
        } catch (TransformerException e1) {
984
            // TODO Auto-generated catch block
985
            e1.printStackTrace();
986
        }
987
        JTextArea textArea = new JTextArea("What category is it for this paragraph \n"+fp);
988
        JScrollPane scrollPane = new JScrollPane(textArea);
989
        textArea.setLineWrap(true);
990
        textArea.setWrapStyleWord(true);
991
        scrollPane.setPreferredSize( new Dimension( 600, 400 ) );
992

    
993
        String[] possiblities = {"synonyms","material examined","distribution","image caption","Other","vernacular name","type status","new category"};
994

    
995

    
996
        String s = (String)JOptionPane.showInputDialog(
997
                null,
998
                scrollPane,
999
                "",
1000
                JOptionPane.PLAIN_MESSAGE,
1001
                null,
1002
                possiblities,
1003
                "Other");
1004

    
1005
        if (s.equalsIgnoreCase("new category")) {
1006
            try {
1007
                s=askFeatureName(formatNode(fullParagraph));
1008
            } catch (TransformerFactoryConfigurationError e) {
1009
                logger.warn(e);
1010
            } catch (TransformerException e) {
1011
                logger.warn(e);
1012
            }
1013
        }
1014
        return s;
1015

    
1016
    }
1017

    
1018

    
1019

    
1020
    /**
1021
     * asks for the hierarchical parent, based on the current classification
1022
     * @param taxon
1023
     * @param classification
1024
     * @return Taxon, the parent Taxon
1025
     */
1026
    protected Taxon askParent(Taxon taxon,Classification classification ) {
1027
        // System.out.println("ASK PARENT "+classification);
1028
        //        logger.info("ask Parent "+taxon.getTitleCache());
1029
        Set<TaxonNode> allNodes = classification.getAllNodes();
1030
        Map<String,Taxon> nodesMap = new HashMap<String, Taxon>();
1031

    
1032
        for (TaxonNode tn:allNodes){
1033
            Taxon t = tn.getTaxon();
1034
            nodesMap.put(t.getTitleCache(), t);
1035
        }
1036
        List<String> nodeList = new ArrayList<String>();
1037
        for (String nl : nodesMap.keySet()) {
1038
            nodeList.add(nl+" - "+nodesMap.get(nl).getName().getRank());
1039
        }
1040
        Collections.sort(nodeList);
1041
        nodeList.add(0, "Not here!");
1042

    
1043
        JFrame frame = new JFrame("I have a question");
1044
        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
1045
        String s = (String)JOptionPane.showInputDialog(
1046
                frame,
1047
                "What is the taxon parent for "+taxon.getTitleCache()+"?",
1048
                "The current classification is "+classification.getTitleCache(),
1049
                JOptionPane.PLAIN_MESSAGE,
1050
                null,
1051
                nodeList.toArray(),
1052
                "Not here!");
1053

    
1054
        Taxon returnTaxon = nodesMap.get(s.split(" - ")[0]);
1055
        //        logger.info("ask Parent returns "+s);
1056
        return returnTaxon;
1057
    }
1058

    
1059

    
1060
    /**
1061
     *
1062
     * @param r: the rank as string (with dwc tags)
1063
     * @return Rank : the Rank object corresponding to the current string
1064
     *
1065
     */
1066
    protected Rank getRank(String r){
1067
        if (r==null) {
1068
            r=Rank.UNKNOWN_RANK().toString();
1069
        }
1070
        r=r.replace("dwcranks:", "");
1071
        r =r.replace("dwc:","");
1072

    
1073
        Rank rank = Rank.UNKNOWN_RANK();
1074
        if (r.equalsIgnoreCase("Superfamily")) {
1075
            rank=Rank.SUPERFAMILY();
1076
        }
1077
        else if (r.equalsIgnoreCase("Family")) {
1078
            rank=Rank.FAMILY();
1079
        }
1080
        else if (r.equalsIgnoreCase("Subfamily")) {
1081
            rank=Rank.SUBFAMILY();
1082
        }
1083
        else if (r.equalsIgnoreCase("Tribe")) {
1084
            rank=Rank.TRIBE();
1085
        }
1086
        else if (r.equalsIgnoreCase("Subtribe")) {
1087
            rank=Rank.SUBTRIBE();
1088
        }
1089
        else if (r.equalsIgnoreCase("Genus")) {
1090
            rank=Rank.GENUS();
1091
        }
1092
        else if (r.equalsIgnoreCase("Subgenus")) {
1093
            rank=Rank.SUBGENUS();
1094
        }
1095
        else if (r.equalsIgnoreCase("Section")) {
1096
            rank=Rank.SECTION_BOTANY();
1097
        }
1098
        else if (r.equalsIgnoreCase("Subsection")) {
1099
            rank=Rank.SUBSECTION_BOTANY();
1100
        }
1101
        else if (r.equalsIgnoreCase("Series")) {
1102
            rank=Rank.SERIES();
1103
        }
1104
        else if (r.equalsIgnoreCase("Subseries")) {
1105
            rank=Rank.SUBSERIES();
1106
        }
1107
        else if (r.equalsIgnoreCase("Species")) {
1108
            rank=Rank.SPECIES();
1109
        }
1110
        else if (r.equalsIgnoreCase("Subspecies")) {
1111
            rank=Rank.SUBSPECIES();
1112
        }
1113
        else if (r.equalsIgnoreCase("Variety") || r.equalsIgnoreCase("varietyEpithet")) {
1114
            rank=Rank.VARIETY();
1115
        }
1116
        else if (r.equalsIgnoreCase("Subvariety")) {
1117
            rank=Rank.SUBVARIETY();
1118
        }
1119
        else if (r.equalsIgnoreCase("Form")) {
1120
            rank=Rank.FORM();
1121
        }
1122
        else if (r.equalsIgnoreCase("Subform")) {
1123
            rank=Rank.SUBFORM();
1124
        }else if (r.equalsIgnoreCase("higher")) {
1125
//            rank=Rank.SUPRAGENERICTAXON();
1126
        	logger.warn("handling of 'higher' rank still unclear");
1127
        }
1128

    
1129
        return rank;
1130
    }
1131

    
1132

    
1133
    /**
1134
     * @param ato: atomised taxon name data
1135
     * @return rank present in the xmldata fields
1136
     */
1137
    protected Rank getRank(Map<String, String> ato) {
1138
        Rank rank=Rank.UNKNOWN_RANK();
1139

    
1140
        if (ato == null) {
1141
            return rank;
1142
        }
1143
        if (ato.containsKey("dwc:family")){
1144
            rank=Rank.FAMILY();
1145
        }
1146
        if (ato.containsKey("dwc:tribe") || ato.containsKey("dwcranks:tribe")){
1147
            rank=Rank.TRIBE();
1148
        }
1149
        if (ato.containsKey("dwc:genus")) {
1150
            rank= Rank.GENUS();
1151
        }
1152
        if (ato.containsKey("dwc:subgenus")) {
1153
            rank= Rank.SUBGENUS();
1154
        }
1155
        if (ato.containsKey("dwc:specificepithet") || ato.containsKey("dwc:species")) {
1156
            rank= Rank.SPECIES();
1157
        }
1158
        if (ato.containsKey("dwc:infraspecificepithet")) {
1159
            rank= Rank.INFRASPECIES();
1160
        }
1161
        if (ato.containsKey("dwcranks:varietyepithet")) {
1162
            rank=Rank.VARIETY();
1163
        }
1164
        //popUp(rank.getTitleCache());
1165
        return rank;
1166
    }
1167

    
1168
    /**
1169
     * Format a XML node for a clean (screen) output with tags
1170
     * @param Node : the node to format
1171
     * @return String : the XML section formated for a screen output
1172
     * */
1173

    
1174
    protected String formatNode(Node node) throws TransformerFactoryConfigurationError, TransformerException{
1175
        Transformer transformer = TransformerFactory.newInstance().newTransformer();
1176
        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
1177
        //initialize StreamResult with File object to save to file
1178
        StreamResult result = new StreamResult(new StringWriter());
1179
        DOMSource source = new DOMSource(node);
1180
        transformer.transform(source, result);
1181
        String xmlString = result.getWriter().toString();
1182
        return xmlString;
1183
    }
1184

    
1185
    protected boolean containsDistinctLetters(String word){
1186
        Set<Character> dl = new HashSet<Character>();
1187
        for (char a: word.toCharArray()) {
1188
            dl.add(a);
1189
        }
1190
        if(dl.size()>1 && word.indexOf("no description text")==-1) {
1191
            return true;
1192
        } else {
1193
            return false;
1194
        }
1195
    }
1196

    
1197
    /**
1198
     * Tries to match the status string against any new name status
1199
     * and returns the status if it matches. Returns <code>null</code> otherwise.
1200
     * @param status
1201
     * @return
1202
     */
1203
    protected String newNameStatus(String status){
1204
    	String pattern = "(" + "((sp|spec|gen|comb|)\\.\\s*nov.)" +
1205
    				"|(new\\s*(species|combination))" +
1206
    				"|(n\\.\\s*sp\\.)" +
1207
    				"|(sp\\.\\s*n\\.)" +
1208
    				")";
1209
    	if (status.trim().matches(pattern)){
1210
    		//FIXME
1211
    		return null;
1212
//    		return status;
1213
    	}else{
1214
    		return null;
1215
    	}
1216
    }
1217

    
1218

    
1219
    /** Creates an cdm-NomenclaturalCode by the tcs NomenclaturalCode
1220
     */
1221
    protected NomenclaturalStatusType nomStatusString2NomStatus (String nomStatus) throws UnknownCdmTypeException{
1222

    
1223
        if (nomStatus == null){ return null;
1224
        }else if ("Valid".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.VALID();
1225

    
1226
        }else if ("Alternative".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ALTERNATIVE();
1227
        }else if ("nom. altern.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ALTERNATIVE();
1228

    
1229
        }else if ("Ambiguous".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.AMBIGUOUS();
1230

    
1231
        }else if ("Doubtful".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.DOUBTFUL();
1232

    
1233
        }else if ("Confusum".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.CONFUSUM();
1234

    
1235
        }else if ("Illegitimate".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ILLEGITIMATE();
1236
        }else if ("nom. illeg.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ILLEGITIMATE();
1237

    
1238
        }else if ("Superfluous".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.SUPERFLUOUS();
1239
        }else if ("nom. superfl.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.SUPERFLUOUS();
1240

    
1241
        }else if ("Rejected".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.REJECTED();
1242
        }else if ("nom. rej.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.REJECTED();
1243

    
1244
        }else if ("Utique Rejected".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.UTIQUE_REJECTED();
1245

    
1246
        }else if ("Conserved Prop".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.CONSERVED_PROP();
1247

    
1248
        }else if ("Orthography Conserved Prop".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ORTHOGRAPHY_CONSERVED_PROP();
1249

    
1250
        }else if ("Legitimate".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.LEGITIMATE();
1251

    
1252
        }else if ("Novum".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.NOVUM();
1253
        }else if ("nom. nov.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.NOVUM();
1254

    
1255
        }else if ("Utique Rejected Prop".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.UTIQUE_REJECTED_PROP();
1256

    
1257
        }else if ("Orthography Conserved".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ORTHOGRAPHY_CONSERVED();
1258

    
1259
        }else if ("Rejected Prop".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.REJECTED_PROP();
1260

    
1261
        }else if ("Conserved".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.CONSERVED();
1262
        }else if ("nom. cons.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.CONSERVED();
1263

    
1264
        }else if ("Sanctioned".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.SANCTIONED();
1265

    
1266
        }else if ("Invalid".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.INVALID();
1267
        }else if ("nom. inval.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.INVALID();
1268

    
1269
        }else if ("Nudum".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.NUDUM();
1270
        }else if ("nom. nud.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.NUDUM();
1271

    
1272
        }else if ("Combination Invalid".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.COMBINATION_INVALID();
1273

    
1274
        }else if ("Provisional".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.PROVISIONAL();
1275
        }else if ("nom. provis.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.PROVISIONAL();
1276
        }
1277
        else {
1278
            throw new UnknownCdmTypeException("Unknown Nomenclatural status type " + nomStatus);
1279
        }
1280
    }
1281

    
1282

    
1283
    //TypeDesignation
1284
    protected  SpecimenTypeDesignationStatus typeStatusId2TypeStatus (int typeStatusId)  throws UnknownCdmTypeException{
1285
        switch (typeStatusId){
1286
        case 0: return null;
1287
        case 1: return SpecimenTypeDesignationStatus.HOLOTYPE();
1288
        case 2: return SpecimenTypeDesignationStatus.LECTOTYPE();
1289
        case 3: return SpecimenTypeDesignationStatus.NEOTYPE();
1290
        case 4: return SpecimenTypeDesignationStatus.EPITYPE();
1291
        case 5: return SpecimenTypeDesignationStatus.ISOLECTOTYPE();
1292
        case 6: return SpecimenTypeDesignationStatus.ISONEOTYPE();
1293
        case 7: return SpecimenTypeDesignationStatus.ISOTYPE();
1294
        case 8: return SpecimenTypeDesignationStatus.PARANEOTYPE();
1295
        case 9: return SpecimenTypeDesignationStatus.PARATYPE();
1296
        case 10: return SpecimenTypeDesignationStatus.SECOND_STEP_LECTOTYPE();
1297
        case 11: return SpecimenTypeDesignationStatus.SECOND_STEP_NEOTYPE();
1298
        case 12: return SpecimenTypeDesignationStatus.SYNTYPE();
1299
        case 21: return SpecimenTypeDesignationStatus.ICONOTYPE();
1300
        case 22: return SpecimenTypeDesignationStatus.PHOTOTYPE();
1301
        default: {
1302
            throw new UnknownCdmTypeException("Unknown TypeDesignationStatus (id=" + Integer.valueOf(typeStatusId).toString() + ")");
1303
        }
1304
        }
1305
    }
1306

    
1307

    
1308
}
1309

    
1310

    
(3-3/9)