Project

General

Profile

Download (50.6 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
 * Copyright (C) 2013 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9
package eu.etaxonomy.cdm.io.taxonx2013;
10

    
11
import java.awt.Dimension;
12
import java.io.StringWriter;
13
import java.util.ArrayList;
14
import java.util.Collections;
15
import java.util.HashMap;
16
import java.util.HashSet;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Scanner;
20
import java.util.Set;
21
import java.util.UUID;
22

    
23
import javax.swing.JFrame;
24
import javax.swing.JOptionPane;
25
import javax.swing.JScrollPane;
26
import javax.swing.JTextArea;
27
import javax.swing.UIManager;
28
import javax.xml.transform.OutputKeys;
29
import javax.xml.transform.Transformer;
30
import javax.xml.transform.TransformerException;
31
import javax.xml.transform.TransformerFactory;
32
import javax.xml.transform.TransformerFactoryConfigurationError;
33
import javax.xml.transform.dom.DOMSource;
34
import javax.xml.transform.stream.StreamResult;
35

    
36
import org.apache.commons.lang.StringUtils;
37
import org.apache.log4j.Logger;
38
import org.w3c.dom.Node;
39
import org.w3c.dom.NodeList;
40

    
41
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
42
import eu.etaxonomy.cdm.api.service.IAgentService;
43
import eu.etaxonomy.cdm.io.specimen.UnitsGatheringArea;
44
import eu.etaxonomy.cdm.io.specimen.UnitsGatheringEvent;
45
import eu.etaxonomy.cdm.model.agent.AgentBase;
46
import eu.etaxonomy.cdm.model.agent.Person;
47
import eu.etaxonomy.cdm.model.common.CdmBase;
48
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
49
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
50
import eu.etaxonomy.cdm.model.common.Language;
51
import eu.etaxonomy.cdm.model.common.TimePeriod;
52
import eu.etaxonomy.cdm.model.description.Feature;
53
import eu.etaxonomy.cdm.model.description.TaxonDescription;
54
import eu.etaxonomy.cdm.model.description.TextData;
55
import eu.etaxonomy.cdm.model.location.NamedArea;
56
import eu.etaxonomy.cdm.model.name.INonViralName;
57
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
58
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
59
import eu.etaxonomy.cdm.model.name.Rank;
60
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
61
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
62
import eu.etaxonomy.cdm.model.name.TaxonName;
63
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
64
import eu.etaxonomy.cdm.model.occurrence.GatheringEvent;
65
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
66
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
67
import eu.etaxonomy.cdm.model.reference.IBook;
68
import eu.etaxonomy.cdm.model.reference.IBookSection;
69
import eu.etaxonomy.cdm.model.reference.Reference;
70
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
71
import eu.etaxonomy.cdm.model.taxon.Classification;
72
import eu.etaxonomy.cdm.model.taxon.Taxon;
73
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
74
import eu.etaxonomy.cdm.persistence.dto.UuidAndTitleCache;
75
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
76
import eu.etaxonomy.cdm.strategy.parser.ParserProblem;
77

    
78

    
79
/**
80
 * @author pkelbert
81
 \* @since 2 avr. 2013
82
 *
83
 */
84
public class TaxonXExtractor {
85

    
86
    protected TaxonXImport importer;
87
    protected TaxonXImportState state2;
88
    private final Map<String,String> namesAsked = new HashMap<String, String>();
89
    private final Map<String,Rank>ranksAsked = new HashMap<String, Rank>();
90

    
91
    Logger logger = Logger.getLogger(TaxonXExtractor.class);
92

    
93
    public class ReferenceBuilder{
94
        private int nbRef=0;
95
        private boolean foundBibref=false;
96
        private final TaxonXAddSources sourceHandler;
97

    
98
        /**
99
         * @param sourceHandler
100
         */
101
        public ReferenceBuilder(TaxonXAddSources sourceHandler) {
102
            this.sourceHandler=sourceHandler;
103
        }
104

    
105
        /**
106
         * @return the foundBibref
107
         */
108
        public boolean isFoundBibref() {
109
            return foundBibref;
110
        }
111

    
112
        /**
113
         * @param foundBibref the foundBibref to set
114
         */
115
        public void setFoundBibref(boolean foundBibref) {
116
            this.foundBibref = foundBibref;
117
        }
118

    
119

    
120
        /**
121
         * @param ref
122
         * @param refMods
123
         */
124
        public void builReference(String mref, String treatmentMainName, NomenclaturalCode nomenclaturalCode,
125
                Taxon acceptedTaxon, Reference refMods) {
126
            // System.out.println("builReference "+mref);
127
            this.setFoundBibref(true);
128

    
129
            String ref= mref;
130
            if ( (ref.endsWith(";") ||ref.endsWith(",")  ) && ((ref.length())>1)) {
131
                ref=ref.substring(0, ref.length()-1)+".";
132
            }
133
            if (ref.startsWith(treatmentMainName) && !ref.endsWith(treatmentMainName)) {
134
                ref=ref.replace(treatmentMainName, "");
135
                ref=ref.trim();
136
                while (ref.startsWith(".") || ref.startsWith(",")) {
137
                    ref=ref.replace(".","").replace(",","").trim();
138
                }
139
            }
140

    
141
            //                        logger.info("Current reference :"+nbRef+", "+ref+", "+treatmentMainName+"--"+ref.indexOf(treatmentMainName));
142
            Reference reference = ReferenceFactory.newGeneric();
143
            reference.setTitleCache(ref, true);
144

    
145
            //only add the first one if there is no nomenclatural reference yet
146
            if (nbRef==0){
147
                if(acceptedTaxon.getName().getNomenclaturalReference()==null){
148
                    acceptedTaxon.getName().setNomenclaturalReference(reference);
149
                    sourceHandler.addSource(refMods, acceptedTaxon);
150
                }
151
            }
152
            //add all other references as Feature.Citation
153
            TaxonDescription taxonDescription =importer.getTaxonDescription(acceptedTaxon, false, true);
154
            acceptedTaxon.addDescription(taxonDescription);
155
            sourceHandler.addSource(refMods, acceptedTaxon);
156

    
157
            TextData textData = TextData.NewInstance(Feature.CITATION());
158
            Language language = Language.DEFAULT();
159
            textData.putText(language, ref);
160
            sourceHandler.addSource(reference, textData,acceptedTaxon.getName(),refMods);
161
            taxonDescription.addElement(textData);
162

    
163
            sourceHandler.addSource(refMods, taxonDescription);
164

    
165
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
166
            //                        logger.warn("BWAAHHHH: "+nameToBeFilled.getParsingProblems()+", "+ref);
167
            nbRef++;
168

    
169
        }
170

    
171
    }
172

    
173
    public class MySpecimenOrObservation{
174
        String descr="";
175
        DerivedUnit derivedUnitBase=null;
176

    
177
        public String getDescr() {
178
            return descr;
179
        }
180
        public void setDescr(String descr) {
181
            this.descr = descr;
182
        }
183
        public DerivedUnit getDerivedUnitBase() {
184
            return derivedUnitBase;
185
        }
186
        public void setDerivedUnitBase(DerivedUnit derivedUnitBase) {
187
            this.derivedUnitBase = derivedUnitBase;
188
        }
189

    
190

    
191

    
192

    
193
    }
194

    
195
    /**
196
     * @param item
197
     * @return
198
     */
199
    @SuppressWarnings({ "unused", "rawtypes" })
200
    protected MySpecimenOrObservation extractSpecimenOrObservation(Node specimenObservationNode, DerivedUnit derivedUnitBase,
201
            SpecimenOrObservationType defaultAssociation, TaxonName typifiableName) {
202
        String country=null;
203
        String locality=null;
204
        String stateprov=null;
205
        String collector=null;
206
        String fieldNumber=null;
207
        Double latitude=null,longitude=null;
208
        TimePeriod tp =null;
209
        String day,month,year="";
210
        String descr="not available";
211
        String type="";
212
        boolean asso=false;
213
        NodeList eventContent =null;
214
        // create facade
215
        DerivedUnitFacade derivedUnitFacade = null;
216

    
217
        UnitsGatheringEvent unitsGatheringEvent;
218
        UnitsGatheringArea unitsGatheringArea;
219
        DefinedTermBase areaCountry;
220

    
221
        MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
222

    
223
        NodeList xmldata= specimenObservationNode.getChildNodes();
224
        for (int n=0;n<xmldata.getLength();n++){
225
            eventContent=xmldata.item(n).getChildNodes();
226
            if (xmldata.item(n).getNodeName().equalsIgnoreCase("tax:xmldata")){
227
                asso=true;
228
                country=null;
229
                locality=null;
230
                stateprov=null;
231
                collector=null;
232
                fieldNumber=null;
233
                latitude=null;
234
                longitude=null;
235
                day="";
236
                month="";
237
                year="";
238
                type="";
239
                for (int j=0;j<eventContent.getLength();j++){
240
                    if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:country")){
241
                        country=eventContent.item(j).getTextContent().trim();
242
                    }
243
                    else if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:locality")){
244
                        locality=eventContent.item(j).getTextContent().trim();
245
                    }
246
                    else  if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:stateprovince")){
247
                        stateprov=eventContent.item(j).getTextContent().trim();
248
                    }
249
                    else  if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:collector")){
250
                        collector=eventContent.item(j).getTextContent().trim();
251
                    }
252
                    else  if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:yearcollected")){
253
                        year=eventContent.item(j).getTextContent().trim();
254
                    }
255
                    else  if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:monthcollected")){
256
                        month=eventContent.item(j).getTextContent().trim();
257
                    }
258
                    else  if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:daycollected")){
259
                        day=eventContent.item(j).getTextContent().trim();
260
                    }
261
                    else  if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:decimallongitude")){
262
                        String tmp = eventContent.item(j).getTextContent().trim();
263
                        try{longitude=Double.valueOf(tmp);}catch(Exception e){logger.warn("longitude is not a number");}
264
                    }
265
                    else  if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:decimallatitude")){
266
                        String tmp = eventContent.item(j).getTextContent().trim();
267
                        try{latitude=Double.valueOf(tmp);}catch(Exception e){logger.warn("latitude is not a number");}
268
                    }else if(eventContent.item(j).getNodeName().equalsIgnoreCase("dwc:TypeStatus")){
269
                        type = eventContent.item(j).getTextContent().trim();
270
                    }else if(eventContent.item(j).getNodeName().equalsIgnoreCase("#text") && StringUtils.isBlank(eventContent.item(j).getTextContent())){
271
                        //do nothing
272
                    }
273
                    else {
274
                        logger.info("UNEXTRACTED FIELD FOR SPECIMEN "+eventContent.item(j).getNodeName()+", "+eventContent.item(j).getTextContent()) ;
275
                    }
276
                }
277
                if (!day.isEmpty() || !month.isEmpty() || !year.isEmpty()){
278
                    try{
279
                        if (!year.isEmpty()) {
280
                            tp = TimePeriod.NewInstance(Integer.parseInt(year));
281
                            if (!month.isEmpty()) {
282
                                tp.setStartMonth(Integer.parseInt(month));
283
                                if (!day.isEmpty()) {
284
                                    tp.setStartDay(Integer.parseInt(day));
285
                                }
286
                            }
287

    
288
                        }
289
                    }catch(Exception e){
290
                        logger.warn("Collection date error "+e);
291
                    }
292
                }
293
            }
294
            if(xmldata.item(n).getNodeName().equalsIgnoreCase("#text")){
295
                descr=xmldata.item(n).getTextContent().replaceAll(";","").trim();
296
                if (descr.length()>1 && containsDistinctLetters(descr)) {
297
                    specimenOrObservation.setDescr(descr);
298
                    asso=true;
299
                }
300
            }
301
            if(xmldata.item(n).getNodeName().equalsIgnoreCase("tax:p")){
302
                descr=xmldata.item(n).getTextContent().replaceAll(";","").trim();
303
                if (descr.length()>1 && containsDistinctLetters(descr)) {
304
                    specimenOrObservation.setDescr(descr);
305
                    asso=true;
306
                }
307
            }
308
        }
309
        //        if(asso && descr.length()>1){
310

    
311
        //            logger.info("DESCR: "+descr);
312
        if (!type.isEmpty()) {
313
            if (!containsDistinctLetters(type)) {
314
                type="no description text";
315
            }
316
            derivedUnitFacade = getFacade(type.replaceAll(";",""), defaultAssociation);
317
            SpecimenTypeDesignation designation = SpecimenTypeDesignation.NewInstance();
318

    
319
            if (typifiableName != null){
320
            	typifiableName.addTypeDesignation(designation, true);
321
            }else{
322
            	logger.warn("No typifiable name available");
323
            }
324
            SpecimenTypeDesignationStatus stds= getSpecimenTypeDesignationStatusByKey(type);
325
            if (stds !=null) {
326
                stds = (SpecimenTypeDesignationStatus) importer.getTermService().find(stds.getUuid());
327
            }
328

    
329
            designation.setTypeStatus(stds);
330
            derivedUnitFacade.innerDerivedUnit().addSpecimenTypeDesignation(designation);
331

    
332
            derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
333
            // System.out.println("derivedUnitBase: "+derivedUnitBase);
334
            //                designation.setTypeSpecimen(derivedUnitBase);
335
            //                TaxonName name = taxon.getName();
336
            //                name.addTypeDesignation(designation, true);
337
        } else {
338
            if (!containsDistinctLetters(descr.replaceAll(";",""))) {
339
                descr="no description text";
340
            }
341

    
342
            derivedUnitFacade = getFacade(descr.replaceAll(";",""), defaultAssociation);
343
            derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
344
            // System.out.println("derivedUnitBase2: "+derivedUnitBase);
345
        }
346

    
347
        unitsGatheringEvent = new UnitsGatheringEvent(importer.getTermService(), locality,collector,longitude, latitude,
348
                state2.getConfig(),importer.getAgentService());
349

    
350
        if(tp!=null) {
351
            unitsGatheringEvent.setGatheringDate(tp);
352
        }
353

    
354
        // country
355
        unitsGatheringArea = new UnitsGatheringArea();
356
        unitsGatheringArea.setParams(null, country, state2.getConfig(), importer.getTermService(), importer.getOccurrenceService(), importer.getVocabularyService());
357
        //TODO other areas
358
        if (StringUtils.isNotBlank(stateprov)){
359
        	Map<String, String> namedAreas = new HashMap<String, String>();
360
        	namedAreas.put(stateprov, null);
361
            unitsGatheringArea.setAreaNames(namedAreas, state2.getConfig(), importer.getTermService(), importer.getVocabularyService());
362
        }
363

    
364
        areaCountry =  unitsGatheringArea.getCountry();
365

    
366
        //                         // other areas
367
        //                         unitsGatheringArea = new UnitsGatheringArea(namedAreaList,dataHolder.getTermService());
368
        //                         ArrayList<DefinedTermBase> nas = unitsGatheringArea.getAreas();
369
        //                         for (DefinedTermBase namedArea : nas) {
370
        //                             unitsGatheringEvent.addArea(namedArea);
371
        //                         }
372

    
373
        // copy gathering event to facade
374
        GatheringEvent gatheringEvent = unitsGatheringEvent.getGatheringEvent();
375
        derivedUnitFacade.setGatheringEvent(gatheringEvent);
376
        derivedUnitFacade.setLocality(gatheringEvent.getLocality());
377
        derivedUnitFacade.setExactLocation(gatheringEvent.getExactLocation());
378
        derivedUnitFacade.setCollector(gatheringEvent.getCollector());
379
        derivedUnitFacade.setCountry((NamedArea)areaCountry);
380

    
381
        for(DefinedTermBase<?> area:unitsGatheringArea.getAreas()){
382
            derivedUnitFacade.addCollectingArea((NamedArea) area);
383
        }
384
        //                         derivedUnitFacade.addCollectingAreas(unitsGatheringArea.getAreas());
385

    
386
        // add fieldNumber
387
        if (fieldNumber != null) {
388
            derivedUnitFacade.setFieldNumber(fieldNumber);
389
        }
390
        specimenOrObservation.setDerivedUnitBase(derivedUnitBase);
391
        //        }
392
        return specimenOrObservation;
393
    }
394

    
395

    
396
    private SpecimenTypeDesignationStatus getSpecimenTypeDesignationStatusByKey(
397
            String key) {
398
        if (key == null) {
399
            return null;
400
        } else if (key.matches("(?i)(T|Type)")) {
401
            return SpecimenTypeDesignationStatus.TYPE();
402
        } else if (key.matches("(?i)(HT|Holotype)")) {
403
            return SpecimenTypeDesignationStatus.HOLOTYPE();
404
        } else if (key.matches("(?i)(LT|Lectotype)")) {
405
            return SpecimenTypeDesignationStatus.LECTOTYPE();
406
        } else if (key.matches("(?i)(NT|Neotype)")) {
407
            return SpecimenTypeDesignationStatus.NEOTYPE();
408
        } else if (key.matches("(?i)(ST|Syntype)")) {
409
            return SpecimenTypeDesignationStatus.SYNTYPE();
410
        } else if (key.matches("(?i)(ET|Epitype)")) {
411
            return SpecimenTypeDesignationStatus.EPITYPE();
412
        } else if (key.matches("(?i)(IT|Isotype)")) {
413
            return SpecimenTypeDesignationStatus.ISOTYPE();
414
        } else if (key.matches("(?i)(ILT|Isolectotype)")) {
415
            return SpecimenTypeDesignationStatus.ISOLECTOTYPE();
416
        } else if (key.matches("(?i)(INT|Isoneotype)")) {
417
            return SpecimenTypeDesignationStatus.ISONEOTYPE();
418
        } else if (key.matches("(?i)(IET|Isoepitype)")) {
419
            return SpecimenTypeDesignationStatus.ISOEPITYPE();
420
        } else if (key.matches("(?i)(PT|Paratype)")) {
421
            return SpecimenTypeDesignationStatus.PARATYPE();
422
        } else if (key.matches("(?i)(PLT|Paralectotype)")) {
423
            return SpecimenTypeDesignationStatus.PARALECTOTYPE();
424
        } else if (key.matches("(?i)(PNT|Paraneotype)")) {
425
            return SpecimenTypeDesignationStatus.PARANEOTYPE();
426
        } else if (key.matches("(?i)(unsp.|Unspecified)")) {
427
            return SpecimenTypeDesignationStatus.UNSPECIFIC();
428
        } else if (key.matches("(?i)(2LT|Second Step Lectotype)")) {
429
            return SpecimenTypeDesignationStatus.SECOND_STEP_LECTOTYPE();
430
        } else if (key.matches("(?i)(2NT|Second Step Neotype)")) {
431
            return SpecimenTypeDesignationStatus.SECOND_STEP_NEOTYPE();
432
        } else if (key.matches("(?i)(OM|Original Material)")) {
433
            return SpecimenTypeDesignationStatus.ORIGINAL_MATERIAL();
434
        } else if (key.matches("(?i)(IcT|Iconotype)")) {
435
            return SpecimenTypeDesignationStatus.ICONOTYPE();
436
        } else if (key.matches("(?i)(PT|Phototype)")) {
437
            return SpecimenTypeDesignationStatus.PHOTOTYPE();
438
        } else if (key.matches("(?i)(IST|Isosyntype)")) {
439
            return SpecimenTypeDesignationStatus.ISOSYNTYPE();
440
        } else {
441
            return null;
442
        }
443
    }
444
    protected DerivedUnitFacade getFacade(String recordBasis, SpecimenOrObservationType defaultAssoc) {
445
        // System.out.println("getFacade() for "+recordBasis+", defaultassociation: "+defaultAssoc);
446
        SpecimenOrObservationType type = null;
447

    
448
        // create specimen
449
        if (recordBasis != null) {
450
            String recordBasisL = recordBasis.toLowerCase();
451
            if (recordBasisL.startsWith("specimen") || recordBasisL.contains("specimen") || recordBasisL.contains("type")) {// specimen
452
                type = SpecimenOrObservationType.PreservedSpecimen;
453
            }
454
            if (recordBasisL.startsWith("observation")) {
455
                type = SpecimenOrObservationType.Observation;
456
            }
457
            if (recordBasisL.contains("fossil")) {
458
                type = SpecimenOrObservationType.Fossil;
459
            }
460

    
461
            if (recordBasisL.startsWith("living")) {
462
                type = SpecimenOrObservationType.LivingSpecimen;
463
            }
464
            if (type == null) {
465
                logger.info("The basis of record does not seem to be known: *" + recordBasisL+"*");
466
                type = defaultAssoc;
467
            }
468
            // TODO fossils?
469
        } else {
470
            logger.info("The basis of record is null");
471
            type = defaultAssoc;
472
        }
473
        DerivedUnitFacade derivedUnitFacade = DerivedUnitFacade.NewInstance(type);
474
        return derivedUnitFacade;
475
    }
476

    
477

    
478

    
479
    @SuppressWarnings("rawtypes")
480
    protected Feature makeFeature(SpecimenOrObservationBase unit) {
481
        if (unit == null){
482
            return null;
483
        }
484
        SpecimenOrObservationType type = unit.getRecordBasis();
485

    
486
        if (type.isFeatureObservation()){
487
            return Feature.OBSERVATION();
488
        }else if (type.isPreservedSpecimen() ||
489
                type == SpecimenOrObservationType.LivingSpecimen ||
490
                type == SpecimenOrObservationType.OtherSpecimen
491
                ){
492
            return Feature.SPECIMEN();
493
        }else if (type == SpecimenOrObservationType.Unknown ||
494
                type == SpecimenOrObservationType.DerivedUnit
495
                ) {
496
            return Feature.INDIVIDUALS_ASSOCIATION();
497
        }
498
        logger.warn("No feature defined for derived unit class: "
499
                + unit.getClass().getSimpleName());
500
        return null;
501
    }
502

    
503

    
504
    protected final static String SPLITTER = ",";
505

    
506

    
507
    protected  int askQuestion(String question){
508
        Scanner scan = new Scanner(System.in);
509
        logger.info(question);
510
        int index = scan.nextInt();
511
        return index;
512
    }
513

    
514

    
515
    /**
516
     * @param reftype
517
     * @return
518
     */
519
    protected Reference getReferenceWithType(int reftype) {
520
        Reference ref = null;
521
        switch (reftype) {
522
        case 1:
523
            ref = ReferenceFactory.newGeneric();
524
            break;
525
        case 2:
526
            IBook tmp= ReferenceFactory.newBook();
527
            ref = (Reference)tmp;
528
            break;
529
        case 3:
530
            ref = ReferenceFactory.newArticle();
531
            break;
532
        case 4:
533
            IBookSection tmp2 = ReferenceFactory.newBookSection();
534
            ref = (Reference)tmp2;
535
            break;
536
        case 5:
537
            ref = ReferenceFactory.newJournal();
538
            break;
539
        case 6:
540
            ref = ReferenceFactory.newPrintSeries();
541
            break;
542
        case 7:
543
            ref = ReferenceFactory.newThesis();
544
            break;
545
        default:
546
            break;
547
        }
548
        return ref;
549
    }
550
    /**
551
     * @param unitsList
552
     * @param state
553
     */
554
    protected void prepareCollectors(TaxonXImportState state,IAgentService agentService) {
555
        //        logger.info("PREPARE COLLECTORS");
556
        List<String> collectors = new ArrayList<String>();
557
        String tmp;
558
        List<String> collectorsU = new ArrayList<String>(new HashSet<String>(collectors));
559
        Set<UUID> uuids = new HashSet<UUID>();
560

    
561
        //existing persons in DB
562
        List<UuidAndTitleCache<Person>> hiberPersons = agentService.getPersonUuidAndTitleCache();
563
        Map<String,Person> titleCachePerson = new HashMap<String, Person>();
564
        uuids = new HashSet<UUID>();
565
        for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
566
            uuids.add(hibernateP.getUuid());
567
        }
568

    
569
        if (!uuids.isEmpty()){
570
            List<AgentBase> existingPersons = agentService.find(uuids);
571
            for (AgentBase existingP:existingPersons){
572
                titleCachePerson.put(existingP.getTitleCache(),CdmBase.deproxy(existingP, Person.class));
573
            }
574
        }
575

    
576
        Map<String,UUID> personMap = new HashMap<String, UUID>();
577
        for (UuidAndTitleCache<Person> person:hiberPersons){
578
            personMap.put(person.getTitleCache(), person.getUuid());
579
        }
580

    
581
        java.util.Collection<AgentBase> personToadd = new ArrayList<AgentBase>();
582

    
583
        for (String collector:collectorsU){
584
            Person p = Person.NewInstance();
585
            p.setTitleCache(collector,true);
586
            if (!personMap.containsKey(p.getTitleCache())){
587
                personToadd.add(p);
588
            }
589
        }
590

    
591
        if(!personToadd.isEmpty()){
592
            Map<UUID, AgentBase> uuuidPerson = agentService.save(personToadd);
593
            for (UUID u:uuuidPerson.keySet()){
594
                titleCachePerson.put(uuuidPerson.get(u).getTitleCache(), CdmBase.deproxy(uuuidPerson.get(u), Person.class));
595
            }
596
        }
597

    
598
        state.getConfig().setPersons(titleCachePerson);
599
    }
600

    
601
    /**
602
     * @param name
603
     * @return
604
     */
605
    protected String getFullReference(String name, List<ParserProblem> problems) {
606
        //        logger.info("getFullReference for "+ name);
607
        JTextArea textArea = new JTextArea("Complete the reference or the name '"+name+"'.\nThe current problem is "+StringUtils.join(problems,"--"));
608
        JScrollPane scrollPane = new JScrollPane(textArea);
609
        textArea.setLineWrap(true);
610
        textArea.setWrapStyleWord(true);
611
        scrollPane.setPreferredSize( new Dimension( 700, 70 ) );
612

    
613
        //        JFrame frame = new JFrame("I have a question");
614
        //        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
615
        String s = (String)JOptionPane.showInputDialog(
616
                null,
617
                scrollPane,
618
                "Get full reference or name",
619
                JOptionPane.PLAIN_MESSAGE,
620
                null,
621
                null,
622
                name);
623
        return s;
624
    }
625

    
626

    
627

    
628
    /**
629
     * @param name
630
     * @return
631
     * @throws TransformerException
632
     * @throws TransformerFactoryConfigurationError
633
     */
634
    protected String askWhichScientificName(String fullname,String atomised,String classificationName, Node fullParagraph) throws TransformerFactoryConfigurationError, TransformerException {
635
        //        logger.info("getScientificName for "+ fullname);
636
        //        JFrame frame = new JFrame("I have a question");
637
        //        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
638
        String k = fullname+"_"+atomised;
639

    
640
        String defaultN = "";
641
        if (atomised.length()>fullname.length()) {
642
            defaultN=atomised;
643
        } else {
644
            defaultN=fullname;
645
        }
646

    
647
        if (namesAsked.containsKey(k)){
648
            return namesAsked.get(k);
649
        }
650
        else{
651
            //activate it for ants because a lot of markup is incomplete
652
            if (classificationName.indexOf("Ants")>-1) {
653
                return defaultN;
654
            }
655

    
656
            JTextArea textArea = new JTextArea("The names in the free text and in the xml tags do not match : "+fullname+
657
                    ", or "+atomised+"\n"+formatNode(fullParagraph));
658
            JScrollPane scrollPane = new JScrollPane(textArea);
659
            textArea.setLineWrap(true);
660
            textArea.setWrapStyleWord(true);
661
            scrollPane.setPreferredSize( new Dimension( 700, 200 ) );
662
            String s = (String)JOptionPane.showInputDialog(
663
                    null,
664
                    scrollPane,
665
                    "Which name do I have to use? The current classification is "+classificationName,
666
                    JOptionPane.PLAIN_MESSAGE,
667
                    null,
668
                    null,
669
                    defaultN);
670
            namesAsked.put(k, s);
671
            return s;
672
        }
673
    }
674

    
675

    
676
    protected int askAddParent(String s){
677
        //        boolean hack=true;
678
        //        if (hack) {
679
        //            return 1;
680
        //        }
681
        JTextArea textArea = new JTextArea("If you want to add a parent taxa for "+s+", click \"Yes\"." +
682
                " If it is a root for this classification, click \"No\" or \"Cancel\".");
683
        JScrollPane scrollPane = new JScrollPane(textArea);
684
        textArea.setLineWrap(true);
685
        textArea.setWrapStyleWord(true);
686
        scrollPane.setPreferredSize( new Dimension( 600, 70 ) );
687

    
688
        Object[] options = { UIManager.getString("OptionPane.yesButtonText"),
689
                UIManager.getString("OptionPane.noButtonText")};
690

    
691

    
692
        int addTaxon = JOptionPane.showOptionDialog(null,
693
                scrollPane,
694
                "",
695
                JOptionPane.YES_NO_OPTION,
696
                0,
697
                null,
698
                options,
699
                options[1]);
700
        return addTaxon;
701
    }
702

    
703
    protected String askSetParent(String s){
704
        JTextArea textArea =  new JTextArea("What is the first taxon parent for "+s+"?\n"+
705
                "The rank will be asked later. ");
706
        JScrollPane scrollPane = new JScrollPane(textArea);
707
        textArea.setLineWrap(true);
708
        textArea.setWrapStyleWord(true);
709
        scrollPane.setPreferredSize( new Dimension( 700, 200 ) );
710

    
711
        String s2 = (String)JOptionPane.showInputDialog(
712
                null,
713
                scrollPane,
714
                "",
715
                JOptionPane.PLAIN_MESSAGE,
716
                null,
717
                null,
718
                s);
719
        return s2;
720
    }
721

    
722
    protected String askRank(String s, List<String> rankListStr){
723
        JTextArea  textArea = new JTextArea("What is the rank for "+s+"?");
724
        JScrollPane scrollPane = new JScrollPane(textArea);
725
        textArea.setLineWrap(true);
726
        textArea.setWrapStyleWord(true);
727
        scrollPane.setPreferredSize( new Dimension( 700, 200 ) );
728

    
729
        String r = (String)JOptionPane.showInputDialog(
730
                null,
731
                scrollPane,
732
                "",
733
                JOptionPane.PLAIN_MESSAGE,
734
                null,
735
                rankListStr.toArray(),
736
                null);
737
        return r;
738
    }
739

    
740
    /**
741
     * @param name
742
     * @return
743
     * @throws TransformerException
744
     * @throws TransformerFactoryConfigurationError
745
     */
746
    protected String askFeatureName(String paragraph){
747
        //        logger.info("getScientificName for "+ fullname);
748
        //        JFrame frame = new JFrame("I have a question");
749
        //        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
750
        JTextArea textArea = new JTextArea("How should the feature be named? \n"+paragraph);
751
        JScrollPane scrollPane = new JScrollPane(textArea);
752
        textArea.setLineWrap(true);
753
        textArea.setWrapStyleWord(true);
754
        scrollPane.setPreferredSize( new Dimension( 700, 200 ) );
755
        String s = (String)JOptionPane.showInputDialog(
756
                null,
757
                scrollPane,
758
                "",
759
                JOptionPane.PLAIN_MESSAGE,
760
                null,
761
                null,
762
                "Other");
763
        return s;
764
    }
765

    
766
    /**
767
     * @param taxonname2
768
     * @param bestMatchingTaxon
769
     * @param refMods
770
     * @param similarityAuthor
771
     * @return
772
     */
773
    protected boolean askIfReuseBestMatchingTaxon(INonViralName taxonname2, Taxon bestMatchingTaxon, Reference refMods, double similarityScore, double similarityAuthor) {
774
        Object[] options = { UIManager.getString("OptionPane.yesButtonText"),
775
                UIManager.getString("OptionPane.noButtonText")};
776

    
777
        if (similarityScore<0.66 &&  similarityAuthor<0.5) {
778
            return false;
779
            //            System.out.println("should say NO");
780
        }
781

    
782
        boolean sameSource=false;
783
        boolean noRef=false;
784

    
785
        String sec = refMods.getTitleCache();
786
        String secBest = "";
787
        try{
788
            secBest=bestMatchingTaxon.getSec().getTitleCache();
789
        }
790
        catch(NullPointerException e){
791
            logger.warn("no sec - ignore");
792
        }
793

    
794
        if (secBest.isEmpty()) {
795
            noRef=true;
796
        }
797

    
798
        Object defaultOption=options[1];
799
        if(sec.equalsIgnoreCase(secBest)
800
                //                ||                taxonname2.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(bestMatchingTaxon.getTitleCache().split("sec.")[0].trim())
801
                ) {
802
            //System.out.println(sec+" and "+secBest);
803
            sameSource=true;
804
            //-1 <=> no author
805
            if (similarityScore>0.65 && (similarityAuthor==-1 || similarityAuthor>0.8)) {
806
                defaultOption=options[0];
807
            } else {
808
                defaultOption=options[1];
809
            }
810
        } else {
811
            if (similarityScore>0.65 && similarityAuthor>0.8) {
812
                if(similarityScore==1 ) {
813
                    return true;
814
                }
815
                defaultOption=options[0];
816
            } else {
817
                defaultOption=options[1];
818
            }
819
        }
820

    
821
        String sourcesStr="";
822

    
823
        Set<IdentifiableSource> sources = bestMatchingTaxon.getSources();
824
        for (IdentifiableSource src:sources){
825
            try{
826
                String srcSec=src.getCitation().getTitleCache();
827
                if(!srcSec.isEmpty()){
828
                    sourcesStr+="\n "+srcSec;
829
                    if (srcSec.equalsIgnoreCase(sec)){
830
                        sameSource=true;
831
                        if (similarityScore>0.65 && similarityAuthor>0.8) {
832
                            defaultOption=options[0];
833
                        } else {
834
                            defaultOption=options[1];
835
                        }
836
                    }
837
                }
838
            }catch(Exception e){
839
                logger.warn("the source reference is maybe null, just ignore it.");
840
            }
841
        }
842

    
843
        if (sameSource && similarityScore>0.9999 && (similarityAuthor==-1 || similarityAuthor>0.8)) {
844
            return true;
845
        }
846
        if(similarityScore<0.66) {
847
            defaultOption=options[1];
848
        }
849

    
850
        //        //only activate it if you know the data you are importing (ok for Chenopodium)
851
        if(defaultOption==options[1]) {
852
            return false;
853
        }
854

    
855
        JTextArea textArea =null;
856
        if (!sourcesStr.isEmpty()) {
857
            textArea = new JTextArea("Does "+taxonname2.toString()+" correspond to "
858
                    + bestMatchingTaxon.toString()+" ?\n Click \"Yes\". if it does, click \"No\" if it does not."
859
                    + "\n The current sources are:"+ sourcesStr);
860
        } else {
861
            textArea = new JTextArea("Does "+taxonname2.toString()+" correspond to "
862
                    + bestMatchingTaxon.toString()+" ?\n Click \"Yes\". if it does, click \"No\" if it does not.");
863
        }
864
        JScrollPane scrollPane = new JScrollPane(textArea);
865
        textArea.setLineWrap(true);
866
        textArea.setWrapStyleWord(true);
867
        scrollPane.setPreferredSize( new Dimension( 600, 70 ) );
868

    
869
        int addTaxon = JOptionPane.showOptionDialog(null,
870
                scrollPane,
871
                refMods.toString(),
872
                JOptionPane.YES_NO_OPTION,
873
                0,
874
                null,
875
                options,
876
                defaultOption);
877
        if(addTaxon==1) {
878
            return false;
879
        } else {
880
            return true;
881
        }
882
    }
883

    
884
    /**
885
     * @param fullLineRefName
886
     * @return
887
     */
888
    protected int askIfNameContained(String fullLineRefName) {
889

    
890
        JTextArea textArea = new JTextArea("Is a scientific name contained in this sentence ? Type 0 if contains a name, 1 if it's only a reference. Press 2 if it's to be ignored \n"+fullLineRefName);
891
        JScrollPane scrollPane = new JScrollPane(textArea);
892
        textArea.setLineWrap(true);
893
        textArea.setWrapStyleWord(true);
894
        scrollPane.setPreferredSize( new Dimension( 600, 400 ) );
895

    
896
        String s = (String)JOptionPane.showInputDialog(
897
                null,
898
                scrollPane,
899
                "",
900
                JOptionPane.PLAIN_MESSAGE,
901
                null,
902
                null,
903
                "0");
904
        return Integer.valueOf(s);
905
    }
906

    
907

    
908
    /**
909
     * @param name
910
     * @return
911
     */
912
    protected Rank askForRank(String fullname,Rank rank, NomenclaturalCode nomenclaturalCode) {
913
        //        logger.info("askForRank for "+ fullname+ ", "+rank);
914
        //        JFrame frame = new JFrame("I have a question");
915
        //        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
916

    
917
        if (ranksAsked.containsKey(fullname)){
918
            return ranksAsked.get(fullname);
919
        }
920
        else{
921
            boolean np=false;
922
            int npi=0;
923
            Rank cR = null;
924

    
925
            while (!np && npi<2)
926
            {
927

    
928

    
929
                JTextArea textArea = new JTextArea("What is the correct rank for "+fullname+"?");
930
                JScrollPane scrollPane = new JScrollPane(textArea);
931
                textArea.setLineWrap(true);
932
                textArea.setWrapStyleWord(true);
933
                scrollPane.setPreferredSize( new Dimension( 600, 50 ) );
934

    
935
                List<Rank> rankList = new ArrayList<Rank>();
936
                rankList = importer.getTermService().list(Rank.class, null, null, null, null);
937

    
938
                List<String> rankListStr = new ArrayList<String>();
939
                for (Rank r:rankList) {
940
                    rankListStr.add(r.toString());
941
                }
942
                String s = (String)JOptionPane.showInputDialog(
943
                        null,
944
                        scrollPane,
945
                        "The rank extracted from the TaxonX file is "+rank.toString(),
946
                        JOptionPane.PLAIN_MESSAGE,
947
                        null,
948
                        rankListStr.toArray(),
949
                        rank.toString());
950

    
951

    
952
                try {
953
                    npi++;
954
                    cR = Rank.getRankByEnglishName(s,nomenclaturalCode,true);
955
                    np=true;
956
                } catch (UnknownCdmTypeException e) {
957
                    logger.warn("Unknown rank ?!"+s);
958
                    logger.warn(e);
959
                }
960
            }
961
            ranksAsked.put(fullname,cR);
962
            return cR;
963

    
964
        }
965
    }
966

    
967
    /**
968
     * ask user to specify what kind of paragraph the current "multiple" section is
969
     * default possibilities are "synonyms","material examined","distribution","image caption","other"
970
     * could make sense to replace this list with the CDM-Feature list
971
     * if "other" is selected, a second pop-up will be prompted to ask user to specify a new Feature name.
972
     * @param fullParagraph : the current Node
973
     * @return the section name
974
     * */
975
    protected String askMultiple(Node fullParagraph){
976
        String fp = "";
977
        try {
978
            fp = formatNode(fullParagraph);
979
        } catch (TransformerFactoryConfigurationError e1) {
980
            // TODO Auto-generated catch block
981
            e1.printStackTrace();
982
        } catch (TransformerException e1) {
983
            // TODO Auto-generated catch block
984
            e1.printStackTrace();
985
        }
986
        JTextArea textArea = new JTextArea("What category is it for this paragraph \n"+fp);
987
        JScrollPane scrollPane = new JScrollPane(textArea);
988
        textArea.setLineWrap(true);
989
        textArea.setWrapStyleWord(true);
990
        scrollPane.setPreferredSize( new Dimension( 600, 400 ) );
991

    
992
        String[] possiblities = {"synonyms","material examined","distribution","image caption","Other","vernacular name","type status","new category"};
993

    
994

    
995
        String s = (String)JOptionPane.showInputDialog(
996
                null,
997
                scrollPane,
998
                "",
999
                JOptionPane.PLAIN_MESSAGE,
1000
                null,
1001
                possiblities,
1002
                "Other");
1003

    
1004
        if (s.equalsIgnoreCase("new category")) {
1005
            try {
1006
                s=askFeatureName(formatNode(fullParagraph));
1007
            } catch (TransformerFactoryConfigurationError e) {
1008
                logger.warn(e);
1009
            } catch (TransformerException e) {
1010
                logger.warn(e);
1011
            }
1012
        }
1013
        return s;
1014

    
1015
    }
1016

    
1017

    
1018

    
1019
    /**
1020
     * asks for the hierarchical parent, based on the current classification
1021
     * @param taxon
1022
     * @param classification
1023
     * @return Taxon, the parent Taxon
1024
     */
1025
    protected Taxon askParent(Taxon taxon,Classification classification ) {
1026
        // System.out.println("ASK PARENT "+classification);
1027
        //        logger.info("ask Parent "+taxon.getTitleCache());
1028
        Set<TaxonNode> allNodes = classification.getAllNodes();
1029
        Map<String,Taxon> nodesMap = new HashMap<String, Taxon>();
1030

    
1031
        for (TaxonNode tn:allNodes){
1032
            Taxon t = tn.getTaxon();
1033
            nodesMap.put(t.getTitleCache(), t);
1034
        }
1035
        List<String> nodeList = new ArrayList<String>();
1036
        for (String nl : nodesMap.keySet()) {
1037
            nodeList.add(nl+" - "+nodesMap.get(nl).getName().getRank());
1038
        }
1039
        Collections.sort(nodeList);
1040
        nodeList.add(0, "Not here!");
1041

    
1042
        JFrame frame = new JFrame("I have a question");
1043
        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
1044
        String s = (String)JOptionPane.showInputDialog(
1045
                frame,
1046
                "What is the taxon parent for "+taxon.getTitleCache()+"?",
1047
                "The current classification is "+classification.getTitleCache(),
1048
                JOptionPane.PLAIN_MESSAGE,
1049
                null,
1050
                nodeList.toArray(),
1051
                "Not here!");
1052

    
1053
        Taxon returnTaxon = nodesMap.get(s.split(" - ")[0]);
1054
        //        logger.info("ask Parent returns "+s);
1055
        return returnTaxon;
1056
    }
1057

    
1058

    
1059
    /**
1060
     *
1061
     * @param r: the rank as string (with dwc tags)
1062
     * @return Rank : the Rank object corresponding to the current string
1063
     *
1064
     */
1065
    protected Rank getRank(String r){
1066
        if (r==null) {
1067
            r=Rank.UNKNOWN_RANK().toString();
1068
        }
1069
        r=r.replace("dwcranks:", "");
1070
        r =r.replace("dwc:","");
1071

    
1072
        Rank rank = Rank.UNKNOWN_RANK();
1073
        if (r.equalsIgnoreCase("Superfamily")) {
1074
            rank=Rank.SUPERFAMILY();
1075
        }
1076
        else if (r.equalsIgnoreCase("Family")) {
1077
            rank=Rank.FAMILY();
1078
        }
1079
        else if (r.equalsIgnoreCase("Subfamily")) {
1080
            rank=Rank.SUBFAMILY();
1081
        }
1082
        else if (r.equalsIgnoreCase("Tribe")) {
1083
            rank=Rank.TRIBE();
1084
        }
1085
        else if (r.equalsIgnoreCase("Subtribe")) {
1086
            rank=Rank.SUBTRIBE();
1087
        }
1088
        else if (r.equalsIgnoreCase("Genus")) {
1089
            rank=Rank.GENUS();
1090
        }
1091
        else if (r.equalsIgnoreCase("Subgenus")) {
1092
            rank=Rank.SUBGENUS();
1093
        }
1094
        else if (r.equalsIgnoreCase("Section")) {
1095
            rank=Rank.SECTION_BOTANY();
1096
        }
1097
        else if (r.equalsIgnoreCase("Subsection")) {
1098
            rank=Rank.SUBSECTION_BOTANY();
1099
        }
1100
        else if (r.equalsIgnoreCase("Series")) {
1101
            rank=Rank.SERIES();
1102
        }
1103
        else if (r.equalsIgnoreCase("Subseries")) {
1104
            rank=Rank.SUBSERIES();
1105
        }
1106
        else if (r.equalsIgnoreCase("Species")) {
1107
            rank=Rank.SPECIES();
1108
        }
1109
        else if (r.equalsIgnoreCase("Subspecies")) {
1110
            rank=Rank.SUBSPECIES();
1111
        }
1112
        else if (r.equalsIgnoreCase("Variety") || r.equalsIgnoreCase("varietyEpithet")) {
1113
            rank=Rank.VARIETY();
1114
        }
1115
        else if (r.equalsIgnoreCase("Subvariety")) {
1116
            rank=Rank.SUBVARIETY();
1117
        }
1118
        else if (r.equalsIgnoreCase("Form")) {
1119
            rank=Rank.FORM();
1120
        }
1121
        else if (r.equalsIgnoreCase("Subform")) {
1122
            rank=Rank.SUBFORM();
1123
        }else if (r.equalsIgnoreCase("higher")) {
1124
//            rank=Rank.SUPRAGENERICTAXON();
1125
        	logger.warn("handling of 'higher' rank still unclear");
1126
        }
1127

    
1128
        return rank;
1129
    }
1130

    
1131

    
1132
    /**
1133
     * @param ato: atomised taxon name data
1134
     * @return rank present in the xmldata fields
1135
     */
1136
    protected Rank getRank(Map<String, String> ato) {
1137
        Rank rank=Rank.UNKNOWN_RANK();
1138

    
1139
        if (ato == null) {
1140
            return rank;
1141
        }
1142
        if (ato.containsKey("dwc:family")){
1143
            rank=Rank.FAMILY();
1144
        }
1145
        if (ato.containsKey("dwc:tribe") || ato.containsKey("dwcranks:tribe")){
1146
            rank=Rank.TRIBE();
1147
        }
1148
        if (ato.containsKey("dwc:genus")) {
1149
            rank= Rank.GENUS();
1150
        }
1151
        if (ato.containsKey("dwc:subgenus")) {
1152
            rank= Rank.SUBGENUS();
1153
        }
1154
        if (ato.containsKey("dwc:specificepithet") || ato.containsKey("dwc:species")) {
1155
            rank= Rank.SPECIES();
1156
        }
1157
        if (ato.containsKey("dwc:infraspecificepithet")) {
1158
            rank= Rank.INFRASPECIES();
1159
        }
1160
        if (ato.containsKey("dwcranks:varietyepithet")) {
1161
            rank=Rank.VARIETY();
1162
        }
1163
        //popUp(rank.getTitleCache());
1164
        return rank;
1165
    }
1166

    
1167
    /**
1168
     * Format a XML node for a clean (screen) output with tags
1169
     * @param Node : the node to format
1170
     * @return String : the XML section formated for a screen output
1171
     * */
1172

    
1173
    protected String formatNode(Node node) throws TransformerFactoryConfigurationError, TransformerException{
1174
        Transformer transformer = TransformerFactory.newInstance().newTransformer();
1175
        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
1176
        //initialize StreamResult with File object to save to file
1177
        StreamResult result = new StreamResult(new StringWriter());
1178
        DOMSource source = new DOMSource(node);
1179
        transformer.transform(source, result);
1180
        String xmlString = result.getWriter().toString();
1181
        return xmlString;
1182
    }
1183

    
1184
    protected boolean containsDistinctLetters(String word){
1185
        Set<Character> dl = new HashSet<Character>();
1186
        for (char a: word.toCharArray()) {
1187
            dl.add(a);
1188
        }
1189
        if(dl.size()>1 && word.indexOf("no description text")==-1) {
1190
            return true;
1191
        } else {
1192
            return false;
1193
        }
1194
    }
1195

    
1196
    /**
1197
     * Tries to match the status string against any new name status
1198
     * and returns the status if it matches. Returns <code>null</code> otherwise.
1199
     * @param status
1200
     * @return
1201
     */
1202
    protected String newNameStatus(String status){
1203
    	String pattern = "(" + "((sp|spec|gen|comb|)\\.\\s*nov.)" +
1204
    				"|(new\\s*(species|combination))" +
1205
    				"|(n\\.\\s*sp\\.)" +
1206
    				"|(sp\\.\\s*n\\.)" +
1207
    				")";
1208
    	if (status.trim().matches(pattern)){
1209
    		//FIXME
1210
    		return null;
1211
//    		return status;
1212
    	}else{
1213
    		return null;
1214
    	}
1215
    }
1216

    
1217

    
1218
    /** Creates an cdm-NomenclaturalCode by the tcs NomenclaturalCode
1219
     */
1220
    protected NomenclaturalStatusType nomStatusString2NomStatus (String nomStatus) throws UnknownCdmTypeException{
1221

    
1222
        if (nomStatus == null){ return null;
1223
        }else if ("Valid".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.VALID();
1224

    
1225
        }else if ("Alternative".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ALTERNATIVE();
1226
        }else if ("nom. altern.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ALTERNATIVE();
1227

    
1228
        }else if ("Ambiguous".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.AMBIGUOUS();
1229

    
1230
        }else if ("Doubtful".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.DOUBTFUL();
1231

    
1232
        }else if ("Confusum".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.CONFUSUM();
1233

    
1234
        }else if ("Illegitimate".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ILLEGITIMATE();
1235
        }else if ("nom. illeg.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ILLEGITIMATE();
1236

    
1237
        }else if ("Superfluous".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.SUPERFLUOUS();
1238
        }else if ("nom. superfl.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.SUPERFLUOUS();
1239

    
1240
        }else if ("Rejected".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.REJECTED();
1241
        }else if ("nom. rej.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.REJECTED();
1242

    
1243
        }else if ("Utique Rejected".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.UTIQUE_REJECTED();
1244

    
1245
        }else if ("Conserved Prop".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.CONSERVED_PROP();
1246

    
1247
        }else if ("Orthography Conserved Prop".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ORTHOGRAPHY_CONSERVED_PROP();
1248

    
1249
        }else if ("Legitimate".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.LEGITIMATE();
1250

    
1251
        }else if ("Novum".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.NOVUM();
1252
        }else if ("nom. nov.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.NOVUM();
1253

    
1254
        }else if ("Utique Rejected Prop".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.UTIQUE_REJECTED_PROP();
1255

    
1256
        }else if ("Orthography Conserved".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.ORTHOGRAPHY_CONSERVED();
1257

    
1258
        }else if ("Rejected Prop".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.REJECTED_PROP();
1259

    
1260
        }else if ("Conserved".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.CONSERVED();
1261
        }else if ("nom. cons.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.CONSERVED();
1262

    
1263
        }else if ("Sanctioned".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.SANCTIONED();
1264

    
1265
        }else if ("Invalid".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.INVALID();
1266
        }else if ("nom. inval.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.INVALID();
1267

    
1268
        }else if ("Nudum".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.NUDUM();
1269
        }else if ("nom. nud.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.NUDUM();
1270

    
1271
        }else if ("Combination Invalid".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.COMBINATION_INVALID();
1272

    
1273
        }else if ("Provisional".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.PROVISIONAL();
1274
        }else if ("nom. provis.".equalsIgnoreCase(nomStatus)){return NomenclaturalStatusType.PROVISIONAL();
1275
        }
1276
        else {
1277
            throw new UnknownCdmTypeException("Unknown Nomenclatural status type " + nomStatus);
1278
        }
1279
    }
1280

    
1281

    
1282
    //TypeDesignation
1283
    protected  SpecimenTypeDesignationStatus typeStatusId2TypeStatus (int typeStatusId)  throws UnknownCdmTypeException{
1284
        switch (typeStatusId){
1285
        case 0: return null;
1286
        case 1: return SpecimenTypeDesignationStatus.HOLOTYPE();
1287
        case 2: return SpecimenTypeDesignationStatus.LECTOTYPE();
1288
        case 3: return SpecimenTypeDesignationStatus.NEOTYPE();
1289
        case 4: return SpecimenTypeDesignationStatus.EPITYPE();
1290
        case 5: return SpecimenTypeDesignationStatus.ISOLECTOTYPE();
1291
        case 6: return SpecimenTypeDesignationStatus.ISONEOTYPE();
1292
        case 7: return SpecimenTypeDesignationStatus.ISOTYPE();
1293
        case 8: return SpecimenTypeDesignationStatus.PARANEOTYPE();
1294
        case 9: return SpecimenTypeDesignationStatus.PARATYPE();
1295
        case 10: return SpecimenTypeDesignationStatus.SECOND_STEP_LECTOTYPE();
1296
        case 11: return SpecimenTypeDesignationStatus.SECOND_STEP_NEOTYPE();
1297
        case 12: return SpecimenTypeDesignationStatus.SYNTYPE();
1298
        case 21: return SpecimenTypeDesignationStatus.ICONOTYPE();
1299
        case 22: return SpecimenTypeDesignationStatus.PHOTOTYPE();
1300
        default: {
1301
            throw new UnknownCdmTypeException("Unknown TypeDesignationStatus (id=" + Integer.valueOf(typeStatusId).toString() + ")");
1302
        }
1303
        }
1304
    }
1305

    
1306

    
1307
}
1308

    
1309

    
(3-3/9)