Project

General

Profile

Download (231 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
 * Copyright (C) 2013 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9
package eu.etaxonomy.cdm.io.taxonx2013;
10

    
11
import java.io.BufferedWriter;
12
import java.io.File;
13
import java.io.FileWriter;
14
import java.io.IOException;
15
import java.util.ArrayList;
16
import java.util.Arrays;
17
import java.util.HashMap;
18
import java.util.List;
19
import java.util.Map;
20
import java.util.Set;
21
import java.util.UUID;
22
import java.util.regex.Matcher;
23
import java.util.regex.Pattern;
24

    
25
import javax.xml.transform.TransformerException;
26
import javax.xml.transform.TransformerFactoryConfigurationError;
27

    
28
import org.apache.commons.lang.StringUtils;
29
import org.apache.log4j.Logger;
30
import org.w3c.dom.Node;
31
import org.w3c.dom.NodeList;
32

    
33
import com.ibm.lsid.MalformedLSIDException;
34

    
35
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
36
import eu.etaxonomy.cdm.api.service.pager.Pager;
37
import eu.etaxonomy.cdm.common.URI;
38
import eu.etaxonomy.cdm.model.agent.AgentBase;
39
import eu.etaxonomy.cdm.model.agent.Person;
40
import eu.etaxonomy.cdm.model.common.CdmBase;
41
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
42
import eu.etaxonomy.cdm.model.common.LSID;
43
import eu.etaxonomy.cdm.model.common.Language;
44
import eu.etaxonomy.cdm.model.description.Feature;
45
import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
46
import eu.etaxonomy.cdm.model.description.TaxonDescription;
47
import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
48
import eu.etaxonomy.cdm.model.description.TextData;
49
import eu.etaxonomy.cdm.model.name.INonViralName;
50
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
51
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
52
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
53
import eu.etaxonomy.cdm.model.name.Rank;
54
import eu.etaxonomy.cdm.model.name.TaxonName;
55
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
56
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
57
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
58
import eu.etaxonomy.cdm.model.reference.Reference;
59
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
60
import eu.etaxonomy.cdm.model.taxon.Classification;
61
import eu.etaxonomy.cdm.model.taxon.Synonym;
62
import eu.etaxonomy.cdm.model.taxon.SynonymType;
63
import eu.etaxonomy.cdm.model.taxon.Taxon;
64
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
65
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
66
import eu.etaxonomy.cdm.model.term.TermNode;
67
import eu.etaxonomy.cdm.model.term.TermTree;
68
import eu.etaxonomy.cdm.persistence.dto.UuidAndTitleCache;
69
import eu.etaxonomy.cdm.persistence.query.MatchMode;
70
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
71
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
72
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
73
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImplRegExBase;
74

    
75
/**
76
 * @author pkelbert
77
 * @since 2 avr. 2013
78
 *
79
 */
80
public class TaxonXTreatmentExtractor extends TaxonXExtractor{
81

    
82
    private static final String PUBLICATION_YEAR = "publicationYear";
83

    
84
	private static final Logger logger = Logger.getLogger(TaxonXTreatmentExtractor.class);
85

    
86
    private static final String notMarkedUp = "Not marked-up";
87
    private static final UUID proIbioTreeUUID = UUID.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
88
    private static final UUID OtherUUID = UUID.fromString("6465f8aa-2175-446f-807e-7163994b120f");
89
    private static final UUID NotMarkedUpUUID = UUID.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
90
    private static final boolean skippQuestion = true;
91

    
92
    private final NomenclaturalCode nomenclaturalCode;
93
    private Classification classification;
94

    
95
    private  String treatmentMainName,originalTreatmentName;
96

    
97
    private final HashMap<String,Map<String,String>> namesMap = new HashMap<>();
98

    
99

    
100
    private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
101
    private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
102

    
103
    private boolean maxRankRespected =false;
104
    private Map<String, Feature> featuresMap;
105

    
106
    private MyName currentMyName;
107

    
108
    private Reference sourceUrlRef;
109

    
110
    private String followingText;  //text element immediately following a tax:name in tax:nomenclature TODO move do state
111
    private String usedFollowingTextPrefix; //the part of the following text which has been used during taxon name creation
112

    
113
    private final TaxonXAddSources sourceHandler = new TaxonXAddSources();
114

    
115
    /**
116
     * @param nomenclaturalCode
117
     * @param classification
118
     * @param importer
119
     * @param configState
120
     */
121
    public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
122
            TaxonXImportState configState,Map<String, Feature> featuresMap,  Reference urlSource) {
123
        this.nomenclaturalCode=nomenclaturalCode;
124
        this.classification = classification;
125
        this.importer=importer;
126
        this.state2=configState;
127
        this.featuresMap=featuresMap;
128
        this.sourceUrlRef =urlSource;
129
        prepareCollectors(configState, importer.getAgentService());
130
        this.sourceHandler.setSourceUrlRef(sourceUrlRef);
131
        this.sourceHandler.setImporter(importer);
132
        this.sourceHandler.setConfigState(configState);
133
    }
134

    
135
    /**
136
     * extracts all the treament information and save them
137
     * @param treatmentnode: the XML Node
138
     * @param tosave: the list of object to save into the CDM
139
     * @param refMods: the reference extracted from the MODS
140
     * @param sourceName: the URI of the document
141
     */
142
    @SuppressWarnings({ "rawtypes", "unused" })
143

    
144
    protected void extractTreatment(Node treatmentnode, Reference refMods, URI sourceName) {        logger.info("extractTreatment");
145
        List<TaxonName> namesToSave = new ArrayList<>();
146
        NodeList children = treatmentnode.getChildNodes();
147
        Taxon acceptedTaxon =null;
148
        boolean hasRefgroup=false;
149

    
150
        //needed?
151
        for (int i=0;i<children.getLength();i++){
152
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
153
            	hasRefgroup=true;
154
            }
155
        }
156

    
157
        for (int i=0;i<children.getLength();i++){
158
        	Node child = children.item(i);
159
    		acceptedTaxon = handleSingleNode(refMods, sourceName, namesToSave, child, acceptedTaxon);
160
        }
161
        //        logger.info("saveUpdateNames");
162
        if (maxRankRespected){
163
            importer.getNameService().saveOrUpdate(namesToSave);
164
            importer.getClassificationService().saveOrUpdate(classification);
165
            //logger.info("saveUpdateNames-ok");
166
        }
167

    
168
        buildFeatureTree();
169
    }
170

    
171
	private Taxon handleSingleNode(Reference refMods, URI sourceName,
172
			List<TaxonName> namesToSave, Node child, Taxon acceptedTaxon) {
173
		Taxon defaultTaxon =null;
174

    
175
		String nodeName = child.getNodeName();
176
		if (nodeName.equalsIgnoreCase("tax:nomenclature")){
177
		    NodeList nomenclatureChildren = child.getChildNodes();
178
		    boolean containsName = false;
179
		    for(int k=0; k<nomenclatureChildren.getLength(); k++){
180
		        if(nomenclatureChildren.item(k).getNodeName().equalsIgnoreCase("tax:name")){
181
		            containsName=true;
182
		            break;
183
		        }
184
		    }
185
		    if (containsName){
186
		        reloadClassification();
187
		        //extract "main" the scientific name
188
		        try{
189
		            acceptedTaxon = extractNomenclature(child, namesToSave, refMods);
190
		        }catch(ClassCastException e){
191
		        	//FIXME exception handling
192
		        	e.printStackTrace();
193
		        }
194
		        //                    System.out.println("acceptedTaxon : "+acceptedTaxon);
195
		    }
196
		}else if (nodeName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
197
		    reloadClassification();
198
		    //extract the References within the document
199
		    extractReferences(child, namesToSave ,acceptedTaxon,refMods);
200
		}else if (nodeName.equalsIgnoreCase("tax:div") &&
201
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
202
		    File file = new File(TaxonXImport.LOG_FOLDER + "multipleTaxonX.txt");
203
		    FileWriter writer;
204
		    try {
205
		        writer = new FileWriter(file ,true);
206
		        writer.write(sourceName+"\n");
207
		        writer.flush();
208
		        writer.close();
209
		    } catch (IOException e1) {
210
		        // TODO Auto-generated catch block
211
		        logger.error(e1.getMessage());
212
		    }
213
		    //                String multiple = askMultiple(children.item(i));
214
		    String multiple = "Other";
215
		    if (multiple.equalsIgnoreCase("other")) {
216
		        extractSpecificFeatureNotStructured(child,acceptedTaxon, defaultTaxon,namesToSave, refMods,multiple);
217
		    }else if (multiple.equalsIgnoreCase("synonyms")) {
218
		        try{
219
		            extractSynonyms(child,acceptedTaxon, refMods, null);
220
		        }catch(NullPointerException e){
221
		            logger.warn("the accepted taxon is maybe null");
222
		        }
223
		    }else if(multiple.equalsIgnoreCase("material examined")){
224
		    	extractMaterials(child, acceptedTaxon, refMods, namesToSave);
225
		    }else if (multiple.equalsIgnoreCase("distribution")){
226
		    	extractDistribution(child, acceptedTaxon, defaultTaxon, namesToSave, refMods);
227
		    }else if (multiple.equalsIgnoreCase("type status")){
228
		    	extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, "TypeStatus");
229
		    }else if (multiple.equalsIgnoreCase("vernacular name")){
230
		    	extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
231
		    }else{
232
		    	extractSpecificFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,multiple);
233
		    }
234
		}
235
		else if(nodeName.equalsIgnoreCase("tax:div") &&
236
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
237
		    extractFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, Feature.BIOLOGY_ECOLOGY());
238
		}
239
		else if(nodeName.equalsIgnoreCase("tax:div") &&
240
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected){
241
		    extractDescriptionWithReference(child, acceptedTaxon,defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
242
		}
243
		else if(nodeName.equalsIgnoreCase("tax:div") &&
244
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
245
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
246
		}
247
		else if(nodeName.equalsIgnoreCase("tax:div") &&
248
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
249
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,Feature.DIAGNOSIS());
250
		}
251
		else if(nodeName.equalsIgnoreCase("tax:div") &&
252
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
253
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DISCUSSION());
254
		}
255
		else if(nodeName.equalsIgnoreCase("tax:div") &&
256
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected){
257
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
258
		}
259
		else if(nodeName.equalsIgnoreCase("tax:div") &&
260
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
261
		    extractDistribution(child,acceptedTaxon,defaultTaxon,namesToSave, refMods);
262
		}
263
		else if(nodeName.equalsIgnoreCase("tax:div") &&
264
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
265
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave,refMods,Feature.ETYMOLOGY());
266
		}
267
		else if(nodeName.equalsIgnoreCase("tax:div") &&
268
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
269
		    extractMaterials(child,acceptedTaxon, refMods, namesToSave);
270
		}
271
		else if(nodeName.equalsIgnoreCase("tax:figure") && maxRankRespected){
272
		    extractSpecificFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, "Figure");
273
		}
274
		else if(nodeName.equalsIgnoreCase("tax:div") &&
275
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected){
276
		    extractSpecificFeature(child, acceptedTaxon,defaultTaxon, namesToSave, refMods, "table");
277
		}else if(nodeName.equalsIgnoreCase("tax:div") &&
278
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
279
		    //TODO IGNORE keys for the moment
280
		    //extractKey(children.item(i),acceptedTaxon, nameToSave,source, refMods);
281
		    extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,"Keys - unparsed");
282
		}
283
		else{
284
		    if (! nodeName.equalsIgnoreCase("tax:pb")){
285
		        //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
286
		        if (child.getAttributes() !=null) {
287
		            logger.info("First Attribute: " + child.getAttributes().item(0));
288
		        }
289
		        extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, notMarkedUp);
290
		    }else{
291
		    	//FIXME
292
		    	logger.warn("Unhandled");
293
		    }
294
		}
295
		return acceptedTaxon;
296
	}
297

    
298

    
299
    protected Map<String,Feature> getFeaturesUsed(){
300
        return featuresMap;
301
    }
302
    /**
303
     *
304
     */
305
    private void buildFeatureTree() {
306
        logger.info("buildFeatureTree");
307
        TermTree proibiospheretree = importer.getFeatureTreeService().find(proIbioTreeUUID);
308
        if (proibiospheretree == null){
309
            List<TermTree> trees = importer.getFeatureTreeService().list(TermTree.class, null, null, null, null);
310
            if (trees.size()==1) {
311
                TermTree<Feature> ft = trees.get(0);
312
                if (featuresMap==null) {
313
                    featuresMap=new HashMap<>();
314
                }
315
                for (Feature feature: ft.getDistinctTerms()){
316
                    if(feature!=null) {
317
                        featuresMap.put(feature.getTitleCache(), feature);
318
                    }
319
                }
320
            }
321
            proibiospheretree = TermTree.NewFeatureInstance();
322
            proibiospheretree.setUuid(proIbioTreeUUID);
323
        }
324

    
325
        TermNode root2 = proibiospheretree.getRoot();
326
        if (root2 != null){
327
            int nbChildren = root2.getChildCount()-1;
328
            while (nbChildren>-1){
329
                try{
330
                    root2.removeChild(nbChildren);
331
                }catch(Exception e){logger.warn("Can't remove child from FeatureTree "+e);}
332
                nbChildren --;
333
            }
334

    
335
        }
336

    
337
        for (Feature feature:featuresMap.values()) {
338
            root2.addChild(feature);
339
        }
340
        importer.getFeatureTreeService().saveOrUpdate(proibiospheretree);
341

    
342
    }
343

    
344

    
345
    /**
346
     * @param keys
347
     * @param acceptedTaxon: the current acceptedTaxon
348
     * @param nametosave: the list of objects to save into the CDM
349
     * @param refMods: the current reference extracted from the MODS
350
     */
351
    /*   @SuppressWarnings("rawtypes")
352
    private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonName> nametosave, Reference refMods) {
353
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
354

    
355
        NodeList children = keys.getChildNodes();
356
        String key="";
357
        PolytomousKey poly =  PolytomousKey.NewInstance();
358
        poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
359
        poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
360
        poly.addTaxonomicScope(acceptedTaxon);
361
        poly.setTitleCache("bloup", true);
362
        //        poly.addCoveredTaxon(acceptedTaxon);
363
        PolytomousKeyNode root = poly.getRoot();
364
        PolytomousKeyNode previous = null,tmpKey=null;
365
        Taxon taxonKey=null;
366
        List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
367

    
368
        //        String fullContent = keys.getTextContent();
369
        for (int i=0;i<children.getLength();i++){
370
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
371
                NodeList paragraph = children.item(i).getChildNodes();
372
                key="";
373
                taxonKey=null;
374
                for (int j=0;j<paragraph.getLength();j++){
375
                    if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
376
                        if (! paragraph.item(j).getTextContent().trim().isEmpty()){
377
                            key+=paragraph.item(j).getTextContent().trim();
378
                            //                            logger.info("KEY: "+j+"--"+key);
379
                        }
380
                    }
381
                    if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
382
                        taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
383
                    }
384
                }
385
                //                logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
386
                if (keypattern.matcher(key).matches()){
387
                    tmpKey = PolytomousKeyNode.NewInstance(key);
388
                    if (taxonKey!=null) {
389
                        tmpKey.setTaxon(taxonKey);
390
                    }
391
                    polyNodes.add(tmpKey);
392
                    if (previous == null) {
393
                        root.addChild(tmpKey);
394
                    } else {
395
                        previous.addChild(tmpKey);
396
                    }
397
                }else{
398
                    if (!key.isEmpty()){
399
                        tmpKey=PolytomousKeyNode.NewInstance(key);
400
                        if (taxonKey!=null) {
401
                            tmpKey.setTaxon(taxonKey);
402
                        }
403
                        polyNodes.add(tmpKey);
404
                        if (keypatternend.matcher(key).matches()) {
405
                            root.addChild(tmpKey);
406
                            previous=tmpKey;
407
                        } else{
408
                            previous.addChild(tmpKey);
409
                        }
410

    
411
                    }
412
                }
413
            }
414
        }
415
        importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
416
        importer.getPolytomousKeyService().saveOrUpdate(poly);
417
    }
418
*/
419

    
420

    
421
    /**
422
     * @param taxons: the XML Nodegroup
423
     * @param nametosave: the list of objects to save into the CDM
424
     * @param acceptedTaxon: the current accepted Taxon
425
     * @param refMods: the current reference extracted from the MODS
426
     *
427
     * @return Taxon object built
428
     */
429
    @SuppressWarnings({ "rawtypes", "unused" })
430
    private TaxonName getTaxonNameFromXML(Node taxons, List<TaxonName> nametosave, Reference refMods, boolean isSynonym) {
431
        //        logger.info("getTaxonFromXML");
432
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
433
        logger.info("getTaxonNameFromXML");
434
        TaxonName nameToBeFilled = null;
435

    
436
        currentMyName=new MyName(isSynonym);
437

    
438
        NomenclaturalStatusType statusType = null;
439
        try {
440
        	String followingText = null;  //needs to be checked if following text is possible
441
            currentMyName = extractScientificName(taxons,refMods, null);
442
        } catch (TransformerFactoryConfigurationError e1) {
443
            logger.warn(e1);
444
        } catch (TransformerException e1) {
445
            logger.warn(e1);
446
        }
447
        /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
448

    
449
        nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
450
        if (nameToBeFilled.hasProblem() &&
451
                !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
452
            //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
453
            addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
454
            nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
455
        }
456

    
457
        nameToBeFilled = getTaxonName(nameToBeFilled,nametosave,statusType);
458
         */
459
        nameToBeFilled = currentMyName.getTaxonName();
460
        return nameToBeFilled;
461

    
462
    }
463

    
464

    
465
    /**
466
     *
467
     */
468
    private void reloadClassification() {
469
        logger.info("reloadClassification");
470
        Classification cl = importer.getClassificationService().find(classification.getUuid());
471
        if (cl != null){
472
            classification = cl;
473
        }else{
474
            importer.getClassificationService().saveOrUpdate(classification);
475
            classification = importer.getClassificationService().find(classification.getUuid());
476
        }
477
    }
478

    
479
    //    /**
480
    //     * Create a Taxon for the current NameBase, based on the current reference
481
    //     * @param taxonName
482
    //     * @param refMods: the current reference extracted from the MODS
483
    //     * @return Taxon
484
    //     */
485
    //    @SuppressWarnings({ "unused", "rawtypes" })
486
    //    private Taxon getTaxon(TaxonName taxonName, Reference refMods) {
487
    //        Taxon t = new Taxon(taxonName,null );
488
    //        if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
489
    //            t.setSec(configState.getConfig().getSecundum());
490
    //            logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
491
    //        }
492
    //        /*<<<<<<< .courant
493
    //        boolean sourceExists=false;
494
    //        Set<IdentifiableSource> sources = t.getSources();
495
    //        for (IdentifiableSource src : sources){
496
    //            String micro = src.getCitationMicroReference();
497
    //            Reference r = src.getCitation();
498
    //            if (r.equals(refMods) && micro == null) {
499
    //                sourceExists=true;
500
    //            }
501
    //        }
502
    //        if(!sourceExists) {
503
    //            t.addSource(null,null,refMods,null);
504
    //        }
505
    //=======*/
506
    //        t.addSource(OriginalSourceType.Import,null,null,refMods,null);
507
    //        t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
508
    //        return t;
509
    //    }
510

    
511
    private void  extractDescriptionWithReference(Node typestatus, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods,
512
            String featureName) {
513
        //        System.out.println("extractDescriptionWithReference !");
514
        logger.info("extractDescriptionWithReference");
515
        NodeList children = typestatus.getChildNodes();
516

    
517
        Feature currentFeature=getFeatureObjectFromString(featureName);
518

    
519
        String r="";String s="";
520
        for (int i=0;i<children.getLength();i++){
521
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
522
                s+=children.item(i).getTextContent().trim();
523
            }
524
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
525
                r+= children.item(i).getTextContent().trim();
526
            }
527
            if (s.indexOf(r)>-1) {
528
                s=s.split(r)[0];
529
            }
530
        }
531

    
532
        Reference currentref =  ReferenceFactory.newGeneric();
533
        if(!r.isEmpty()) {
534
            currentref.setTitleCache(r, true);
535
        } else {
536
            currentref=refMods;
537
        }
538
        setParticularDescription(s,acceptedTaxon,defaultTaxon, currentref, refMods,currentFeature);
539
    }
540

    
541
    /**
542
     * @param nametosave
543
     * @param distribution: the XML node group
544
     * @param acceptedTaxon: the current accepted Taxon
545
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
546
     * @param refMods: the current reference extracted from the MODS
547
     */
548
    @SuppressWarnings("rawtypes")
549
    private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonName> nametosave, Reference refMods) {
550
        logger.info("extractDistribution");
551
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
552
        NodeList children = distribution.getChildNodes();
553
        Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<>();
554
        Map<Integer,String> descriptionsFulltext = new HashMap<>();
555

    
556
        for (int i=0;i<children.getLength();i++){
557
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
558
                NodeList paragraph = children.item(i).getChildNodes();
559
                for (int j=0;j<paragraph.getLength();j++){
560
                    if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
561
                        extractText(descriptionsFulltext, i, paragraph.item(j));
562
                    }
563
                    else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
564
                        extractInLine(nametosave, refMods, descriptionsFulltext, i,paragraph.item(j));
565
                    }
566
                    else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
567
                        MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
568
                        DerivedUnit derivedUnitBase = null;
569
                        specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit, null);
570
                        extractTextFromSpecimenOrObservation(specimenOrObservations, descriptionsFulltext, i, specimenOrObservation);
571
                    }
572
                }
573
            }
574
        }
575

    
576
        int m=0;
577
        for (int k:descriptionsFulltext.keySet()) {
578
            if (k>m) {
579
                m=k;
580
            }
581
        }
582
        for (int k:specimenOrObservations.keySet()) {
583
            if (k>m) {
584
                m=k;
585
            }
586
        }
587

    
588

    
589
        if(acceptedTaxon!=null){
590
            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
591
            Feature currentFeature = Feature.DISTRIBUTION();
592
            //        DerivedUnit derivedUnitBase=null;
593
            //        String descr="";
594
            for (int k=0;k<=m;k++){
595
                if(specimenOrObservations.keySet().contains(k)){
596
                    for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
597
                        handleAssociation(acceptedTaxon, refMods, td, soo);
598
                    }
599
                }
600

    
601
                if (descriptionsFulltext.keySet().contains(k)){
602
                    if (!stringIsEmpty(descriptionsFulltext.get(k).trim()) && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
603
                        setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
604
                        break;
605
                    }
606
                    else{
607
                        handleTextData(refMods, descriptionsFulltext, td, currentFeature, k);
608
                    }
609
                }
610

    
611
                if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
612
                    acceptedTaxon.addDescription(td);
613
                    sourceHandler.addAndSaveSource(refMods, td, null);
614
                    importer.getTaxonService().saveOrUpdate(acceptedTaxon);
615
                }
616
            }
617
        }
618
    }
619

    
620
    /**
621
     * @param refMods
622
     * @param descriptionsFulltext
623
     * @param td
624
     * @param currentFeature
625
     * @param k
626
     */
627
    private void handleTextData(Reference refMods, Map<Integer, String> descriptionsFulltext, TaxonDescription td,
628
            Feature currentFeature, int k) {
629
        //logger.info("handleTextData");
630
        TextData textData = TextData.NewInstance();
631
        textData.setFeature(currentFeature);
632
        textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
633
        sourceHandler.addSource(refMods, textData);
634
        td.addElement(textData);
635
    }
636

    
637
    /**
638
     * @param acceptedTaxon
639
     * @param refMods
640
     * @param td
641
     * @param soo
642
     */
643
    private void handleAssociation(Taxon acceptedTaxon, Reference refMods, TaxonDescription td, MySpecimenOrObservation soo) {
644
        logger.info("handleAssociation");
645
        String descr=soo.getDescr();
646
        DerivedUnit derivedUnitBase = soo.getDerivedUnitBase();
647

    
648
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
649

    
650
        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
651

    
652
        Feature feature=null;
653
        feature = makeFeature(derivedUnitBase);
654
        if(!StringUtils.isEmpty(descr)) {
655
            derivedUnitBase.setTitleCache(descr, true);
656
        }
657

    
658
        IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
659

    
660
        taxonDescription.addElement(indAssociation);
661
        sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
662
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
663
        td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
664
    }
665

    
666
    /**
667
     * create an individualAssociation
668
     * @param refMods
669
     * @param derivedUnitBase
670
     * @param feature
671
     * @return
672
     */
673
    private IndividualsAssociation createIndividualAssociation(Reference refMods, DerivedUnit derivedUnitBase,
674
            Feature feature) {
675
        logger.info("createIndividualAssociation");
676
        IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
677
        indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
678
        indAssociation.setFeature(feature);
679
        indAssociation = sourceHandler.addSource(refMods, indAssociation);
680
        return indAssociation;
681
    }
682

    
683
    /**
684
     * @param specimenOrObservations
685
     * @param descriptionsFulltext
686
     * @param i
687
     * @param specimenOrObservation
688
     */
689
    private void extractTextFromSpecimenOrObservation(Map<Integer, List<MySpecimenOrObservation>> specimenOrObservations,
690
            Map<Integer, String> descriptionsFulltext, int i, MySpecimenOrObservation specimenOrObservation) {
691
        logger.info("extractTextFromSpecimenOrObservation");
692
        List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
693
        if (speObsList == null) {
694
            speObsList=new ArrayList<MySpecimenOrObservation>();
695
        }
696
        speObsList.add(specimenOrObservation);
697
        specimenOrObservations.put(i,speObsList);
698

    
699
        String s = specimenOrObservation.getDerivedUnitBase().toString();
700
        if (descriptionsFulltext.get(i) !=null){
701
            s = descriptionsFulltext.get(i)+" "+s;
702
        }
703
        descriptionsFulltext.put(i, s);
704
    }
705

    
706
    /**
707
     * Extract the text with the inline link to a taxon
708
     * @param nametosave
709
     * @param refMods
710
     * @param descriptionsFulltext
711
     * @param i
712
     * @param paragraph
713
     */
714
    @SuppressWarnings("rawtypes")
715
    private void extractInLine(List<TaxonName> nametosave, Reference refMods, Map<Integer, String> descriptionsFulltext,
716
            int i, Node paragraph) {
717
        //logger.info("extractInLine");
718
        String inLine=getInlineTextForName(nametosave, refMods, paragraph);
719
        if (descriptionsFulltext.get(i) !=null){
720
            inLine = descriptionsFulltext.get(i)+inLine;
721
        }
722
        descriptionsFulltext.put(i, inLine);
723
    }
724

    
725
    /**
726
     * Extract the raw text from a Node
727
     * @param descriptionsFulltext
728
     * @param node
729
     * @param j
730
     */
731
    private void extractText(Map<Integer, String> descriptionsFulltext, int i, Node node) {
732
        //logger.info("extractText");
733
        if(!node.getTextContent().trim().isEmpty()) {
734
            String s =node.getTextContent().trim();
735
            if (descriptionsFulltext.get(i) !=null){
736
                s = descriptionsFulltext.get(i)+" "+s;
737
            }
738
            descriptionsFulltext.put(i, s);
739
        }
740
    }
741

    
742

    
743
    /**
744
     * @param materials: the XML node group
745
     * @param acceptedTaxon: the current accepted Taxon
746
     * @param refMods: the current reference extracted from the MODS
747
     */
748
    @SuppressWarnings("rawtypes")
749
    private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference refMods,List<TaxonName> nametosave) {
750
        logger.info("EXTRACTMATERIALS");
751
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
752
        NodeList children = materials.getChildNodes();
753
        NodeList events = null;
754
        //        String descr="";
755

    
756

    
757
        for (int i=0;i<children.getLength();i++){
758
            String rawAssociation="";
759
            boolean added=false;
760
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
761
                events = children.item(i).getChildNodes();
762
                for(int k=0;k<events.getLength();k++){
763
                    if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
764
                        String inLine= getInlineTextForName(nametosave, refMods, events.item(k));
765
                        if(!inLine.isEmpty()) {
766
                            rawAssociation+=inLine;
767
                        }
768
                    }
769
                    if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
770
                            && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
771
                        rawAssociation+= events.item(k).getTextContent().trim();
772
                    }
773
                    if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
774
                        if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
775
                            rawAssociation="no description text";
776
                        }
777
                        added=true;
778
                        handleDerivedUnitFacadeAndBase(acceptedTaxon, refMods, events.item(k), rawAssociation);
779
                    }
780
                    if (!rawAssociation.isEmpty() && !added){
781

    
782
                        Feature feature = Feature.MATERIALS_EXAMINED();
783
                        featuresMap.put(feature.getTitleCache(),feature);
784

    
785
                        TextData textData = createTextData(rawAssociation, refMods, feature);
786

    
787
                        if(! rawAssociation.isEmpty() && (acceptedTaxon!=null)){
788
                            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
789
                            td.addElement(textData);
790
                            acceptedTaxon.addDescription(td);
791
                            sourceHandler.addAndSaveSource(refMods, td, null);
792
                        }
793
                        //                        DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
794
                        //                        derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
795
                        //
796
                        //                        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
797
                        //                        acceptedTaxon.addDescription(taxonDescription);
798
                        //
799
                        //                        IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
800
                        //
801
                        //                        Feature feature = Feature.MATERIALS_EXAMINED();
802
                        //                        featuresMap.put(feature.getTitleCache(),feature);
803
                        //                        if(!StringUtils.isEmpty(rawAssociation)) {
804
                        //                            derivedUnitBase.setTitleCache(rawAssociation, true);
805
                        //                        }
806
                        //                        indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
807
                        //                        indAssociation.setFeature(feature);
808
                        //                        indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
809
                        //
810
                        //                        /*boolean sourceExists=false;
811
                        //                        Set<DescriptionElementSource> dsources = indAssociation.getSources();
812
                        //                        for (DescriptionElementSource src : dsources){
813
                        //                            String micro = src.getCitationMicroReference();
814
                        //                            Reference r = src.getCitation();
815
                        //                            if (r.equals(refMods) && micro == null) {
816
                        //                                sourceExists=true;
817
                        //                            }
818
                        //                        }
819
                        //                        if(!sourceExists) {
820
                        //                            indAssociation.addSource(null, null, refMods, null);
821
                        //                        }*/
822
                        //                        taxonDescription.addElement(indAssociation);
823
                        //                        taxonDescription.setTaxon(acceptedTaxon);
824
                        //                        taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
825
                        //
826
                        //                        /*sourceExists=false;
827
                        //                        Set<IdentifiableSource> sources = taxonDescription.getSources();
828
                        //                        for (IdentifiableSource src : sources){
829
                        //                            String micro = src.getCitationMicroReference();
830
                        //                            Reference r = src.getCitation();
831
                        //                            if (r.equals(refMods) && micro == null) {
832
                        //                                sourceExists=true;
833
                        //                            }
834
                        //                        }
835
                        //                        if(!sourceExists) {
836
                        //                            taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
837
                        //                        }*/
838
                        //
839
                        //                        importer.getDescriptionService().saveOrUpdate(taxonDescription);
840
                        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
841

    
842
                        rawAssociation="";
843
                    }
844
                }
845
            }
846
        }
847
    }
848

    
849
    /**
850
     * @param acceptedTaxon
851
     * @param refMods
852
     * @param events
853
     * @param rawAssociation
854
     * @param k
855
     */
856
    private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon, Reference refMods, Node event,
857
            String rawAssociation) {
858
        logger.info("handleDerivedUnitFacadeAndBase");
859
        String descr;
860
        DerivedUnit derivedUnitBase;
861
        MySpecimenOrObservation myspecimenOrObservation;
862
        DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
863
        derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
864

    
865
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
866

    
867
        //TODO this may not always be correct, ask user
868
        TaxonName typifiableName = acceptedTaxon != null ?  acceptedTaxon.getName() : null;
869
        myspecimenOrObservation = extractSpecimenOrObservation(event,derivedUnitBase,SpecimenOrObservationType.DerivedUnit, typifiableName);
870
        derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
871
        descr=myspecimenOrObservation.getDescr();
872

    
873
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
874

    
875
        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
876

    
877
        Feature feature = makeFeature(derivedUnitBase);
878
        featuresMap.put(feature.getTitleCache(),feature);
879
        if(!StringUtils.isEmpty(descr)) {
880
            derivedUnitBase.setTitleCache(descr, true);
881
        }
882

    
883
        IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
884

    
885
        taxonDescription.addElement(indAssociation);
886
        sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
887
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
888
    }
889

    
890

    
891

    
892
    /**
893
     * @param currentName
894
     * @param materials: the XML node group
895
     * @param acceptedTaxon: the current accepted Taxon
896
     * @param refMods: the current reference extracted from the MODS
897
     */
898
    private String extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference refMods, String event, TaxonName currentName) {
899
        logger.info("extractMaterialsDirect");
900
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
901
        String descr="";
902

    
903
        DerivedUnit derivedUnitBase=null;
904
        MySpecimenOrObservation myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.DerivedUnit, currentName);
905
        derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
906

    
907
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
908

    
909
        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
910

    
911
        Feature feature=null;
912
        if (event.equalsIgnoreCase("collection")){
913
            feature = makeFeature(derivedUnitBase);
914
        }
915
        else{
916
            feature = Feature.MATERIALS_EXAMINED();
917
        }
918
        featuresMap.put(feature.getTitleCache(),  feature);
919

    
920
        descr=myspecimenOrObservation.getDescr();
921
        if(!StringUtils.isEmpty(descr)) {
922
            derivedUnitBase.setTitleCache(descr, true);
923
        }
924

    
925
        IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
926

    
927
        taxonDescription.addElement(indAssociation);
928
        sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
929
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
930

    
931
        return derivedUnitBase.getTitleCache();
932

    
933
    }
934

    
935

    
936
    /**
937
     * @param description: the XML node group
938
     * @param acceptedTaxon: the current acceptedTaxon
939
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
940
     * @param nametosave: the list of objects to save into the CDM
941
     * @param refMods: the current reference extracted from the MODS
942
     * @param featureName: the feature name
943
     */
944
    @SuppressWarnings({ "rawtypes"})
945
    private String extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
946
            List<TaxonName> nametosave, Reference refMods, String featureName ) {
947
        logger.info("extractSpecificFeature "+featureName);
948
        //        System.out.println("GRUUUUuu");
949
        NodeList children = description.getChildNodes();
950
        NodeList insideNodes ;
951
        NodeList trNodes;
952
        //        String descr ="";
953
        String localdescr="";
954
        List<String> blabla=null;
955
        List<String> text = new ArrayList<String>();
956

    
957
        String table="<table>";
958
        String head="";
959
        String line="";
960

    
961
        Feature currentFeature=getFeatureObjectFromString(featureName);
962

    
963
        //        String fullContent = description.getTextContent();
964
        for (int i=0;i<children.getLength();i++){
965
            //            localdescr="";
966
            if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
967
                text.add(children.item(i).getTextContent().trim());
968
            }
969
            if (featureName.equalsIgnoreCase("table")){
970
                if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
971
                        children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
972
                    head = extractTableHead(children.item(i));
973
                    table+=head;
974
                    line = extractTableLine(children.item(i));
975
                    if (!line.equalsIgnoreCase("<tr></tr>")) {
976
                        table+=line;
977
                    }
978
                }
979
                if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
980
                        children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
981
                    line = extractTableLineWithColumn(children.item(i).getChildNodes());
982
                    if(!line.equalsIgnoreCase("<tr></tr>")) {
983
                        table+=line;
984
                    }
985
                }
986
            }
987
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
988
                insideNodes=children.item(i).getChildNodes();
989
                blabla= new ArrayList<String>();
990
                for (int j=0;j<insideNodes.getLength();j++){
991
                    Node insideNode = insideNodes.item(j);
992
                	if (insideNode.getNodeName().equalsIgnoreCase("tax:name")){
993
                        String inlinetext = getInlineTextForName(nametosave, refMods, insideNode);
994
                        if (!inlinetext.isEmpty()) {
995
                            blabla.add(inlinetext);
996
                        }
997
                    }
998
                    else if (insideNode.getNodeName().equalsIgnoreCase("#text")) {
999
                        if(!insideNode.getTextContent().trim().isEmpty()){
1000
                            blabla.add(insideNode.getTextContent().trim());
1001
                            //                            localdescr += insideNodes.item(j).getTextContent().trim();
1002
                        }
1003
                    }
1004
                }
1005
                if (!blabla.isEmpty()) {
1006
                    String blaStr = StringUtils.join(blabla," ").trim();
1007
                    if(!stringIsEmpty(blaStr)) {
1008
                        setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1009
                        text.add(blaStr);
1010
                    }
1011
                }
1012

    
1013
            }
1014
            if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1015
                if(!children.item(i).getTextContent().trim().isEmpty()){
1016
                    localdescr = children.item(i).getTextContent().trim();
1017
                    if(!stringIsEmpty(localdescr)) {
1018
                        setParticularDescription(localdescr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1019
                    }
1020
                }
1021
            }
1022
        }
1023

    
1024
        table+="</table>";
1025
        if (!table.equalsIgnoreCase("<table></table>")){
1026
            //            System.out.println("TABLE : "+table);
1027
            text.add(table);
1028
        }
1029

    
1030
        if (text !=null && !text.isEmpty()) {
1031
            return StringUtils.join(text," ");
1032
        } else {
1033
            return "";
1034
        }
1035

    
1036
    }
1037

    
1038
    /**
1039
     * @param children
1040
     * @param i
1041
     * @return
1042
     */
1043
    private String extractTableLine(Node child) {
1044
        //logger.info("extractTableLine");
1045
        String line;
1046
        line="<tr>";
1047
        if (child.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1048
            line = extractTableLineWithColumn(child.getChildNodes());
1049
        }
1050
        line+="</tr>";
1051
        return line;
1052
    }
1053

    
1054
    /**
1055
     * @param children
1056
     * @param i
1057
     * @return
1058
     */
1059
    private String extractTableHead(Node child) {
1060
        //logger.info("extractTableHead");
1061
        String head;
1062
        String line;
1063
        head="<th>";
1064
        NodeList trNodes = child.getChildNodes();
1065
        for (int k=0;k<trNodes.getLength();k++){
1066
            if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:div")
1067
                    && trNodes.item(k).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1068
                line = extractTableLineWithColumn(trNodes.item(k).getChildNodes());
1069
                head+=line;
1070
            }
1071
        }
1072
        head+="</th>";
1073
        return head;
1074
    }
1075

    
1076
    /**
1077
     * build a html table line, with td columns
1078
     * @param tdNodes
1079
     * @return an html coded line
1080
     */
1081
    private String extractTableLineWithColumn(NodeList tdNodes) {
1082
        //logger.info("extractTableLineWithColumn");
1083
        String line;
1084
        line="<tr>";
1085
        for (int l=0;l<tdNodes.getLength();l++){
1086
            if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1087
                line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1088
            }
1089
        }
1090
        line+="</tr>";
1091
        return line;
1092
    }
1093

    
1094
    /**
1095
     * @param description: the XML node group
1096
     * @param acceptedTaxon: the current acceptedTaxon
1097
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1098
     * @param nametosave: the list of objects to save into the CDM
1099
     * @param refMods: the current reference extracted from the MODS
1100
     * @param featureName: the feature name
1101
     */
1102
    @SuppressWarnings({ "unused"})
1103
    private String extractSpecificFeatureNotStructured(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1104
            List<TaxonName> nameToSave, Reference refMods, String featureName ) {
1105
        logger.info("extractSpecificFeatureNotStructured " + featureName);
1106
        NodeList children = description.getChildNodes();
1107
        NodeList insideNodes ;
1108
        List<String> blabla= new ArrayList<String>();
1109

    
1110

    
1111
        Feature currentFeature = getFeatureObjectFromString(featureName);
1112

    
1113
        String fullContent = description.getTextContent();
1114
        for (int i=0;i<children.getLength();i++){
1115
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1116
                insideNodes=children.item(i).getChildNodes();
1117
                for (int j=0;j<insideNodes.getLength();j++){
1118
                    if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1119
                        String inlineText =getInlineTextForName(nameToSave, refMods, insideNodes.item(j));
1120
                        if(!inlineText.isEmpty()) {
1121
                            blabla.add(inlineText);
1122
                        }
1123
                    }
1124
                    if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1125
                        if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1126
                            blabla.add(insideNodes.item(j).getTextContent().trim());
1127
                        }
1128
                    }
1129
                }
1130
            }
1131
            if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1132
                if(!children.item(i).getTextContent().trim().isEmpty()){
1133
                    String localdescr = children.item(i).getTextContent().trim();
1134
                    if(!localdescr.isEmpty())
1135
                    {
1136
                        blabla.add(localdescr);
1137
                    }
1138
                }
1139
            }
1140
        }
1141

    
1142
        if (blabla !=null && !blabla.isEmpty()) {
1143
            String blaStr = StringUtils.join(blabla," ").trim();
1144
            if (! stringIsEmpty(blaStr)) {
1145
                setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1146
                return blaStr;
1147
            } else {
1148
                return "";
1149
            }
1150
        } else {
1151
            return "";
1152
        }
1153

    
1154
    }
1155

    
1156
    /**
1157
     * @param blaStr
1158
     * @return
1159
     */
1160
    private boolean stringIsEmpty(String blaStr) {
1161
        if (blaStr.matches("(\\.|,|;|\\.-)?")){
1162
        	return true;
1163
        }else{
1164
        	return false;
1165
        }
1166
    }
1167

    
1168
    /**
1169
     * @param nametosave
1170
     * @param refMods
1171
     * @param insideNodes
1172
     */
1173
    private String getInlineTextForName(List<TaxonName> nametosave, Reference refMods, Node insideNode) {
1174
        if (true){
1175
        	NodeList children = insideNode.getChildNodes();
1176
        	String result = "";
1177
            for (int i=0;i<children.getLength();i++){
1178
            	Node nameChild = children.item(i);
1179
                if(nameChild.getNodeName().equalsIgnoreCase("#text")){
1180
                	result += nameChild.getTextContent();
1181
                }else{
1182
                	//do nothing
1183
                }
1184
            }
1185
        	return result.replace("\n", "").trim();
1186
        }else{
1187
	    	TaxonName tnb = getTaxonNameFromXML(insideNode, nametosave,refMods,false);
1188
	        //                        Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1189
	        Taxon tax = currentMyName.getTaxon();
1190
	        if(tnb !=null && tax != null){
1191
	            String linkedTaxon = tnb.getTitleCache().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1192
	            return "<cdm:taxon uuid='"+tax.getUuid()+"'>"+linkedTaxon+"</cdm:taxon>";
1193
	        }else if (tnb != null && tax == null){
1194
	        	//TODO
1195
	        	return "<cdm:taxonName uuid='" + tnb.getUuid() +"'>" + tnb.getTitleCache().split("sec")[0]  +"</cdm:taxonName>";
1196
	        }else{
1197
	        	logger.warn("Inline text has no content yet");
1198
	        }
1199
	        return "";
1200
        }
1201
    }
1202

    
1203
    /**
1204
     * @param featureName
1205
     * @return
1206
     */
1207
    @SuppressWarnings("rawtypes")
1208
    private Feature getFeatureObjectFromString(String featureName) {
1209
        logger.info("getFeatureObjectFromString");
1210
        List<Feature> features = importer.getTermService().list(Feature.class, null,null,null,null);
1211
        Feature currentFeature=null;
1212
        for (Feature feature: features){
1213
            String tmpF = feature.getTitleCache();
1214
            if (tmpF.equalsIgnoreCase(featureName)) {
1215
                currentFeature=feature;
1216
                //                System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1217
            }
1218
        }
1219
        if (currentFeature == null) {
1220
            currentFeature=Feature.NewInstance(featureName, featureName, featureName);
1221
            if(featureName.equalsIgnoreCase("Other")){
1222
                currentFeature.setUuid(OtherUUID);
1223
            }
1224
            if(featureName.equalsIgnoreCase(notMarkedUp)){
1225
                currentFeature.setUuid(NotMarkedUpUUID);
1226
            }
1227
            importer.getTermService().saveOrUpdate(currentFeature);
1228
        }
1229
        return currentFeature;
1230
    }
1231

    
1232

    
1233

    
1234

    
1235
    /**
1236
     * @param children: the XML node group
1237
     * @param nametosave: the list of objects to save into the CDM
1238
     * @param acceptedTaxon: the current acceptedTaxon
1239
     * @param refMods: the current reference extracted from the MODS
1240
     * @param fullContent :the parsed XML content
1241
     * @return a list of description (text)
1242
     */
1243
    @SuppressWarnings({ "unused", "rawtypes" })
1244
    private List<String> parseParagraph(List<TaxonName> namesToSave, Taxon acceptedTaxon, Reference refMods, Node paragraph, Feature feature){
1245
        logger.info("parseParagraph "+feature.toString());
1246
        List<String> fullDescription=  new ArrayList<String>();
1247
        //        String localdescr;
1248
        String descr="";
1249
        NodeList insideNodes ;
1250
        boolean collectionEvent = false;
1251
        List<Node>collectionEvents = new ArrayList<Node>();
1252

    
1253
        NodeList children = paragraph.getChildNodes();
1254

    
1255
        for (int i=0;i<children.getLength();i++){
1256
            //            localdescr="";
1257
            if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1258
                descr += children.item(i).getTextContent().trim();
1259
            }
1260
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1261
                insideNodes=children.item(i).getChildNodes();
1262
                List<String> blabla= new ArrayList<String>();
1263
                for (int j=0;j<insideNodes.getLength();j++){
1264
                    boolean nodeKnown = false;
1265
                    //    System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1266
                    if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1267
                        String inlineText = getInlineTextForName(namesToSave, refMods, insideNodes.item(j));
1268
                        if (!inlineText.isEmpty()) {
1269
                            blabla.add(inlineText);
1270
                        }
1271
                        nodeKnown=true;
1272
                    }
1273
                    else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1274
                        if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1275
                            blabla.add(insideNodes.item(j).getTextContent().trim());
1276
                            // localdescr += insideNodes.item(j).getTextContent().trim();
1277
                        }
1278
                        nodeKnown=true;
1279
                    }
1280
                    else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
1281
                        String ref = insideNodes.item(j).getTextContent().trim();
1282
                        if (ref.endsWith(";")  && ((ref.length())>1)) {
1283
                            ref=ref.substring(0, ref.length()-1)+".";
1284
                        }
1285
                        Reference reference = ReferenceFactory.newGeneric();
1286
                        reference.setTitleCache(ref, true);
1287
                        blabla.add(reference.getTitleCache());
1288
                        nodeKnown=true;
1289
                    }
1290
                    else if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:figure")){
1291
                        String figure = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "figure");
1292
                        blabla.add(figure);
1293
                    }
1294
                    else if(insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:div") &&
1295
                            insideNodes.item(j).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1296
                            insideNodes.item(j).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1297
                        String table = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1298
                        blabla.add(table);
1299
                    }
1300
                    else if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1301
                        //                        logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1302
                        String titlecache  = extractMaterialsDirect(insideNodes.item(j), acceptedTaxon, refMods, "collection", null);
1303
                        blabla.add(titlecache);
1304
                        collectionEvent=true;
1305
                        collectionEvents.add(insideNodes.item(j));
1306
                        nodeKnown=true;
1307
                    }else{
1308
                    	logger.warn("node not handled yet: " + insideNodes.item(j).getNodeName());
1309
                    }
1310

    
1311
                }
1312
                if (!StringUtils.isBlank(StringUtils.join(blabla," "))) {
1313
                    fullDescription.add(StringUtils.join(blabla," "));
1314
                }
1315
            }
1316
            if  (children.item(i).getNodeName().equalsIgnoreCase("tax:figure")){
1317
                String figure = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "Figure");
1318
                fullDescription.add(figure);
1319
            }
1320
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1321
                    children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1322
                    children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1323
                String table = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1324
                fullDescription.add(table);
1325
            }
1326
        }
1327

    
1328
        if( !stringIsEmpty(descr.trim())){
1329
            Feature currentFeature= getNotMarkedUpFeatureObject();
1330
            setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1331
        }
1332
        //        if (collectionEvent) {
1333
        //            logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1334
        //            for (Node coll:collectionEvents){
1335
        //                = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1336
        //            }
1337
        //        }
1338
        return fullDescription;
1339
    }
1340

    
1341

    
1342
    /**
1343
     * @param description: the XML node group
1344
     * @param acceptedTaxon: the current acceptedTaxon
1345
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1346
     * @param nametosave: the list of objects to save into the CDM
1347
     * @param refMods: the current reference extracted from the MODS
1348
     * @param feature: the feature to link the data with
1349
     */
1350
    @SuppressWarnings("rawtypes")
1351
    private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonName> namesToSave, Reference refMods, Feature feature){
1352
        logger.info("EXTRACT FEATURE "+feature.toString());
1353
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1354
        List<String> fullDescription= parseParagraph( namesToSave, acceptedTaxon, refMods, description,feature);
1355

    
1356
        //        System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1357
        if (!fullDescription.isEmpty() &&!stringIsEmpty(StringUtils.join(fullDescription,"\n").trim())) {
1358
            setParticularDescription(StringUtils.join(fullDescription,"\n").trim(),acceptedTaxon,defaultTaxon, refMods,feature);
1359
        }
1360

    
1361
    }
1362

    
1363

    
1364
    /**
1365
     * @param descr: the XML Nodegroup to parse
1366
     * @param acceptedTaxon: the current acceptedTaxon
1367
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1368
     * @param refMods: the current reference extracted from the MODS
1369
     * @param currentFeature: the feature name
1370
     * @return
1371
     */
1372
    private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods, Feature currentFeature) {
1373
        logger.info("setParticularDescription " + currentFeature.getTitleCache()+", \n blabla : "+descr);
1374

    
1375
        //remove redundant feature title
1376
        String featureStr = currentFeature.getTitleCache();
1377
        if (!descr.isEmpty() && descr.toLowerCase().startsWith(featureStr.toLowerCase())){
1378
        	descr = descr.replaceAll("(?i)" + featureStr + "\\.\\s*", "");
1379
        }
1380

    
1381

    
1382
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1383
        featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1384

    
1385
        TextData textData = createTextData(descr, refMods, currentFeature);
1386

    
1387
        if(acceptedTaxon!=null){
1388
            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1389
            td.addElement(textData);
1390
            acceptedTaxon.addDescription(td);
1391

    
1392
            sourceHandler.addAndSaveSource(refMods, td, null);
1393
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1394
        }
1395

    
1396
        if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1397
            try{
1398
                Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1399
                if (tmp!=null) {
1400
                    defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1401
                }else{
1402
                    importer.getTaxonService().saveOrUpdate(defaultTaxon);
1403
                }
1404
            }catch(Exception e){
1405
                logger.debug("TAXON EXISTS"+defaultTaxon);
1406
            }
1407

    
1408
            TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1409
            defaultTaxon.addDescription(td);
1410
            td.addElement(textData);
1411
            sourceHandler.addAndSaveSource(refMods, td, null);
1412
            importer.getTaxonService().saveOrUpdate(defaultTaxon);
1413
        }
1414
    }
1415

    
1416
    /**
1417
     * @param descr
1418
     * @param refMods
1419
     * @param currentFeature
1420
     * @return
1421
     */
1422
    private TextData createTextData(String descr, Reference refMods, Feature currentFeature) {
1423
        //logger.info("createTextData");
1424
        TextData textData = TextData.NewInstance();
1425
        textData.setFeature(currentFeature);
1426
        sourceHandler.addSource(refMods, textData);
1427

    
1428
        textData.putText(Language.UNKNOWN_LANGUAGE(), descr);
1429
        return textData;
1430
    }
1431

    
1432

    
1433

    
1434
    /**
1435
     * @param descr: the XML Nodegroup to parse
1436
     * @param acceptedTaxon: the current acceptedTaxon
1437
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1438
     * @param refMods: the current reference extracted from the MODS
1439
     * @param currentFeature: the feature name
1440
     * @return
1441
     */
1442
    private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon,Reference currentRef, Reference refMods, Feature currentFeature) {
1443
        //        System.out.println("setParticularDescriptionSPecial "+currentFeature);
1444
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
1445
        logger.info("setParticularDescription");
1446
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1447

    
1448
        featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1449
        TextData textData = createTextData(descr, refMods, currentFeature);
1450

    
1451
        if(! descr.isEmpty() && (acceptedTaxon!=null)){
1452
            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1453
            td.addElement(textData);
1454
            acceptedTaxon.addDescription(td);
1455

    
1456
            sourceHandler.addAndSaveSource(refMods, td, currentRef);
1457
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1458
        }
1459

    
1460
        if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1461
            try{
1462
                Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1463
                if (tmp!=null) {
1464
                    defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1465
                }else{
1466
                    importer.getTaxonService().saveOrUpdate(defaultTaxon);
1467
                }
1468
            }catch(Exception e){
1469
                logger.debug("TAXON EXISTS"+defaultTaxon);
1470
            }
1471

    
1472
            TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1473
            defaultTaxon.addDescription(td);
1474
            td.addElement(textData);
1475
            sourceHandler.addAndSaveSource(currentRef, td,currentRef);
1476
            importer.getTaxonService().saveOrUpdate(defaultTaxon);
1477
        }
1478
    }
1479

    
1480

    
1481

    
1482
    /**
1483
     * @param synonyms: the XML Nodegroup to parse
1484
     * @param nametosave: the list of objects to save into the CDM
1485
     * @param acceptedTaxon: the current acceptedTaxon
1486
     * @param refMods: the current reference extracted from the MODS
1487
     */
1488
    @SuppressWarnings({ "rawtypes" })
1489
    private void extractSynonyms(Node synonymsNode, Taxon acceptedTaxon,Reference refMods, String followingText) {
1490
        logger.info("extractSynonyms");
1491
        //System.out.println("extractSynonyms for: "+acceptedTaxon);
1492
        Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1493
        if (ttmp != null) {
1494
            acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1495
        }
1496
        else{
1497
            acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1498
        }
1499
        NodeList children = synonymsNode.getChildNodes();
1500
        List<MyName> names = new ArrayList<MyName>();
1501

    
1502
        if(synonymsNode.getNodeName().equalsIgnoreCase("tax:name")){
1503
            try {
1504
            	MyName myName = extractScientificNameSynonym(synonymsNode, refMods, followingText);
1505
                names.add(myName);
1506
            } catch (TransformerFactoryConfigurationError e) {
1507
                logger.warn(e);
1508
            } catch (TransformerException e) {
1509
                logger.warn(e);
1510
            }
1511
        }
1512

    
1513

    
1514
        for (int i=0;i<children.getLength();i++){
1515
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1516
                NodeList tmp = children.item(i).getChildNodes();
1517
                //                String fullContent = children.item(i).getTextContent();
1518
                for (int j=0; j< tmp.getLength();j++){
1519
                    if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1520
                        try {
1521
                        	MyName myName = extractScientificNameSynonym(tmp.item(j),refMods, followingText);
1522
                            names.add(myName);
1523
                        } catch (TransformerFactoryConfigurationError e) {
1524
                            logger.warn(e);
1525
                        } catch (TransformerException e) {
1526
                            logger.warn(e);
1527
                        }
1528
                    }
1529
                }
1530
            }
1531
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1532
                try {
1533
                	MyName myName = extractScientificNameSynonym(children.item(i),refMods, followingText);
1534
                    names.add(myName);
1535
                } catch (TransformerFactoryConfigurationError e) {
1536
                    logger.warn(e);
1537
                } catch (TransformerException e) {
1538
                    logger.warn(e);
1539
                }
1540

    
1541
            }
1542
        }
1543

    
1544
        for(MyName name:names){
1545
        	TaxonName nameToBeFilled = name.getTaxonName();
1546
            Synonym synonym = name.getSyno();
1547
            addFollowingTextToName(nameToBeFilled, followingText);
1548

    
1549
            /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1550
            nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1551
            if (nameToBeFilled.hasProblem() &&
1552
                    !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1553
                //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1554
                addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1555
                nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1556
            }
1557
            nameToBeFilled = getTaxonName(nameToBeFilled,nametosave,statusType);
1558
             */
1559
            if (!name.getIdentifier().isEmpty() && (name.getIdentifier().length()>2)){
1560
                setLSID(name.getIdentifier(), synonym);
1561
            }
1562

    
1563
            Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1564
            boolean synoExist = false;
1565
            for (Synonym syn: synonymsSet){
1566

    
1567
                boolean a =syn.getName().equals(synonym.getName());
1568
                boolean b = syn.getSec().equals(synonym.getSec());
1569
                if (a && b) {
1570
                    synoExist=true;
1571
                }
1572
            }
1573
            if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1574
                sourceHandler.addSource(refMods, synonym);
1575
                acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1576
            }
1577
        }
1578
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1579
    }
1580

    
1581

    
1582
    private boolean addFollowingTextToName(TaxonName nameToBeFilled, String followingText) {
1583
    	if (nameToBeFilled != null && StringUtils.isNotBlank(followingText)){
1584
    		if (! followingText.matches("\\d\\.?")){
1585

    
1586
	    		if (followingText.startsWith(",")){
1587
	    			followingText = followingText.substring(1).trim();
1588
	    		}
1589
	    		nameToBeFilled.setFullTitleCache(nameToBeFilled.getFullTitleCache()+ "," +followingText , true);
1590
    		}
1591
    		return true;
1592
    	}
1593
    	return false;
1594

    
1595
	}
1596

    
1597
	/**
1598
     * @param refgroup: the XML nodes
1599
     * @param nametosave: the list of objects to save into the CDM
1600
     * @param acceptedTaxon: the current acceptedTaxon
1601
     * @param nametosave: the list of objects to save into the CDM
1602
     * @param refMods: the current reference extracted from the MODS
1603
     * @return the acceptedTaxon (why?)
1604
     * handle cases where the bibref are inside <p> and outside
1605
     */
1606
    @SuppressWarnings({ "rawtypes" })
1607
    private Taxon extractReferences(Node refgroup, List<TaxonName> nametosave, Taxon acceptedTaxon, Reference refMods) {
1608
        logger.info("extractReferences");
1609
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1610

    
1611
        NodeList children = refgroup.getChildNodes();
1612
        INonViralName nameToBeFilled = getNonViralNameAccNomenclature();
1613

    
1614
        ReferenceBuilder refBuild = new ReferenceBuilder(sourceHandler);
1615
        for (int i=0;i<children.getLength();i++){
1616
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
1617
                String ref = children.item(i).getTextContent().trim();
1618
                refBuild.builReference(ref, treatmentMainName, nomenclaturalCode,  acceptedTaxon, refMods);
1619
                if (!refBuild.isFoundBibref()){
1620
                    extractReferenceRawText(children.item(i).getChildNodes(), nameToBeFilled, refMods, acceptedTaxon);
1621
                }
1622
            }
1623

    
1624
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1625
                NodeList references = children.item(i).getChildNodes();
1626
                String descr="";
1627
                for (int j=0;j<references.getLength();j++){
1628
                    if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1629
                        String ref = references.item(j).getTextContent().trim();
1630
                        refBuild.builReference(ref, treatmentMainName,  nomenclaturalCode,  acceptedTaxon, refMods);
1631
                    }
1632
                    else
1633
                        if (references.item(j).getNodeName().equalsIgnoreCase("#text")
1634
                                && !references.item(j).getTextContent().trim().isEmpty()){
1635
                            descr += references.item(j).getTextContent().trim();
1636
                        }
1637

    
1638
                }
1639
                if (!refBuild.isFoundBibref()){
1640
                    //if it's not tagged, put it as row information.
1641
                    //                    extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1642
                    //then put it as a not markup feature if not empty
1643
                    if (!stringIsEmpty(descr.trim())){
1644
                        Feature currentFeature= getNotMarkedUpFeatureObject();
1645
                        setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1646
                    }
1647
                }
1648
            }
1649
        }
1650
        //        importer.getClassificationService().saveOrUpdate(classification);
1651
        return acceptedTaxon;
1652

    
1653
    }
1654

    
1655
    /**
1656
     * get the non viral name according to the current nomenclature
1657
     * @return
1658
     */
1659

    
1660
    private INonViralName getNonViralNameAccNomenclature() {
1661
    	return nomenclaturalCode.getNewTaxonNameInstance(null);
1662
    }
1663

    
1664
    /**
1665
     * @return the feature object for the category "not marked up"
1666
     */
1667
    private Feature getNotMarkedUpFeatureObject() {
1668
    	// FIXME use getFeature(uuid ....)
1669
        logger.info("getNotMarkedUpFeatureObject");
1670
        Feature currentFeature = (Feature)importer.getTermService().find(NotMarkedUpUUID);
1671
        if (currentFeature == null) {
1672
            currentFeature=Feature.NewInstance(notMarkedUp, notMarkedUp, notMarkedUp);
1673
            currentFeature.setUuid(NotMarkedUpUUID);
1674
            //TODO use userDefined Feature Vocabulary
1675
            Feature.DISTRIBUTION().getVocabulary().addTerm(currentFeature);
1676
//            importer.getTermService().saveOrUpdate(currentFeature);
1677
            importer.getVocabularyService().saveOrUpdate(currentFeature.getVocabulary());
1678
        }
1679
        return currentFeature;
1680
    }
1681

    
1682
    /**
1683
     * @param references
1684
     * handle cases where the bibref are inside <p> and outside
1685
     */
1686
    @SuppressWarnings("rawtypes")
1687
    private void extractReferenceRawText(NodeList references, INonViralName nameToBeFilled, Reference refMods,
1688
            Taxon acceptedTaxon) {
1689
        logger.info("extractReferenceRawText");
1690
        String refString="";
1691
        currentMyName= new MyName(true);
1692
        for (int j=0;j<references.getLength();j++){
1693
            acceptedTaxon=CdmBase.deproxy(acceptedTaxon, Taxon.class);
1694
            //no bibref tag inside
1695
            //            System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1696
            if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1697

    
1698
                try {
1699
                	String followingText = null;  //needs to be checked if follText is possible
1700
                	//TODO create or not create?
1701
                    currentMyName = extractScientificName(references.item(j), refMods, followingText);
1702
                } catch (TransformerFactoryConfigurationError e) {
1703
                    logger.warn(e);
1704
                } catch (TransformerException e) {
1705
                    logger.warn(e);
1706
                }
1707

    
1708
                //                name=name.trim();
1709
            }
1710
            if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1711
                refString = references.item(j).getTextContent().trim();
1712
            }
1713
            if(references.item(j).getNodeName().equalsIgnoreCase("#text") && !references.item(j).getTextContent().trim().isEmpty()){
1714
                //
1715
               if (!currentMyName.getStatus().isEmpty()){
1716
            	   String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1717
	               	if (nomNovStatus != null){
1718
	               		nameToBeFilled.setAppendedPhrase(nomNovStatus);
1719
	               	}else{
1720
	            	   try {
1721
	                        NomenclaturalStatusType  statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1722
                            nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1723
	                    } catch (UnknownCdmTypeException e) {
1724
	                        addProblematicStatusToFile(currentMyName.getStatus());
1725
	                        logger.warn("Problem with status");
1726
	                    }
1727
	               	}
1728
                }
1729

    
1730
                String fullLineRefName = references.item(j).getTextContent().trim();
1731
                int nameOrRefOrOther=2;
1732
                nameOrRefOrOther=askIfNameContained(fullLineRefName);
1733
                if (nameOrRefOrOther==0){
1734
                    TaxonName nameTBF = currentMyName.getTaxonName();
1735
                    Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1736

    
1737
                    Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1738
                    //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1739
                    boolean synoExist = false;
1740
                    for (Synonym syn: synonymsSet){
1741
                        //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1742
                        boolean a =syn.getName().equals(synonym.getName());
1743
                        boolean b = syn.getSec().equals(synonym.getSec());
1744
                        if (a && b) {
1745
                            synoExist=true;
1746
                        }
1747
                    }
1748
                    if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1749
                        sourceHandler.addSource(refMods, synonym);
1750

    
1751
                        acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1752
                    }
1753
                }
1754

    
1755
                if (nameOrRefOrOther==1){
1756
                    Reference re = ReferenceFactory.newGeneric();
1757
                    re.setTitleCache(fullLineRefName, true);
1758

    
1759
                    /* TaxonName nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1760
                    if (nameTBF.hasProblem() &&
1761
                            !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1762
                        addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1763
                        nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1764
                    }
1765
                    nameTBF = getTaxonName(nameTBF,nametosave,statusType);
1766
                     */
1767
                    TaxonName nameTBF = currentMyName.getTaxonName();
1768
                    Synonym synonym = Synonym.NewInstance(nameTBF, re);
1769

    
1770
                    Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1771
                    //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1772
                    boolean synoExist = false;
1773
                    for (Synonym syn: synonymsSet){
1774
                        //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1775
                        boolean a =syn.getName().equals(synonym.getName());
1776
                        boolean b = syn.getSec().equals(synonym.getSec());
1777
                        if (a && b) {
1778
                            synoExist=true;
1779
                        }
1780
                    }
1781
                    if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1782
                        sourceHandler.addSource(refMods, synonym);
1783

    
1784
                        acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1785
                    }
1786

    
1787
                }
1788

    
1789

    
1790
                if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1791
                    setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1792
                }
1793
            }
1794

    
1795
            if(!currentMyName.getName().isEmpty()){
1796
                //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
1797
                if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName.getName().trim())){
1798
                    Reference refS = ReferenceFactory.newGeneric();
1799
                    refS.setTitleCache(refString, true);
1800
                    //                            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1801
                    //                            acceptedTaxon.addDescription(td);
1802
                    //                            acceptedTaxon.addSource(refSource);
1803
                    //
1804
                    //                            TextData textData = TextData.NewInstance(Feature.CITATION());
1805
                    //
1806
                    //                            textData.addSource(null, null, refS, null);
1807
                    //                            td.addElement(textData);
1808
                    //                            td.addSource(refSource);
1809
                    //                            importer.getDescriptionService().saveOrUpdate(td);
1810

    
1811

    
1812
                    if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1813
                        setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1814

    
1815
                    }
1816

    
1817
                    acceptedTaxon.getName().setNomenclaturalReference(refS);
1818
                }else{
1819
                    TaxonName nameTBF = currentMyName.getTaxonName();
1820
                    Synonym synonym = null;
1821
                    if (! currentMyName.getStatus().isEmpty()){
1822
                    	String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1823
                    	if (nomNovStatus != null){
1824
                    		nameToBeFilled.setAppendedPhrase(nomNovStatus);
1825
                    	}else{
1826
	                    	try {
1827
	                            NomenclaturalStatusType statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1828
	                            nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1829
	                            synonym = Synonym.NewInstance(nameTBF, refMods);
1830
	                        } catch (UnknownCdmTypeException e) {
1831
	                            addProblematicStatusToFile(currentMyName.getStatus());
1832
	                            logger.warn("Problem with status");
1833
	                            synonym = Synonym.NewInstance(nameTBF, refMods);
1834
	                            synonym.setAppendedPhrase(currentMyName.getStatus());
1835
	                        }
1836
                    	}
1837
                    }else{
1838
                        synonym =  Synonym.NewInstance(nameTBF, refMods);
1839
                    }
1840

    
1841

    
1842
                    if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1843
                        setLSID(currentMyName.getIdentifier(), synonym);
1844
                    }
1845

    
1846
                    Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1847
                    //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1848
                    boolean synoExist = false;
1849
                    for (Synonym syn: synonymsSet){
1850
                        //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1851
                        boolean a =syn.getName().equals(synonym.getName());
1852
                        boolean b = syn.getSec().equals(synonym.getSec());
1853
                        if (a && b) {
1854
                            synoExist=true;
1855
                        }
1856
                    }
1857
                    if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1858
                        sourceHandler.addSource(refMods, synonym);
1859

    
1860
                        acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1861
                    }
1862
                }
1863
            }
1864
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1865
        }
1866
    }
1867

    
1868

    
1869

    
1870
    /**
1871
     * @param identifier
1872
     * @param acceptedTaxon
1873
     */
1874
    @SuppressWarnings("rawtypes")
1875
    private void setLSID(String identifier, TaxonBase<?> taxon) {
1876
        //logger.info("setLSID");
1877
        //        boolean lsidok=false;
1878
        String id = identifier.split("__")[0];
1879
        String source = identifier.split("__")[1];
1880
        if (id.indexOf("lsid")>-1){
1881
            try {
1882
                LSID lsid = new LSID(id);
1883
                taxon.setLsid(lsid);
1884
                //                lsidok=true;
1885
            } catch (MalformedLSIDException e) {
1886
                logger.warn("Malformed LSID");
1887
            }
1888

    
1889
        }
1890

    
1891
        //logger.info("search reference for LSID");
1892
        //  if ((id.indexOf("lsid")<0) || !lsidok){
1893
        //ADD ORIGINAL SOURCE ID EVEN IF LSID
1894
        Reference re = null;
1895
        Pager<Reference> references = importer.getReferenceService().findByTitleWithRestrictions(Reference.class, source, MatchMode.EXACT, null, 1, null, null, null);
1896
        if( references !=null && references.getCount()>0){
1897
            re=references.getRecords().get(0);
1898
        }
1899
        //logger.info("search reference for LSID-end");
1900
        if(re == null){
1901
            re = ReferenceFactory.newGeneric();
1902
            re.setTitleCache(source, true);
1903
            importer.getReferenceService().saveOrUpdate(re);
1904
        }
1905
        re=CdmBase.deproxy(re, Reference.class);
1906

    
1907
        //logger.info("search source for LSID");
1908
        Set<IdentifiableSource> sources = taxon.getSources();
1909
        boolean lsidinsource=false;
1910
        boolean urlinsource=false;
1911
        for (IdentifiableSource src:sources){
1912
            if (id.equalsIgnoreCase(src.getIdInSource()) && re.getTitleCache().equals(src.getCitation().getTitleCache())) {
1913
                lsidinsource=true;
1914
            }
1915
            if (src.getIdInSource() == null && re.getTitleCache().equals(sourceUrlRef.getTitleCache())) {
1916
                urlinsource=true;
1917
            }
1918
        }
1919
        if(!lsidinsource) {
1920
            taxon.addSource(OriginalSourceType.Import, id,null,re,null);
1921
        }
1922
        if(!urlinsource)
1923
        {
1924
            sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
1925
            taxon.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
1926
            // }
1927
        }
1928

    
1929
    }
1930

    
1931
    /**
1932
     * try to solve a parsing problem for a scientific name
1933
     * @param original : the name from the OCR document
1934
     * @param name : the tagged version
1935
     * @param parser
1936
     * @return the corrected TaxonName
1937
     */
1938
    /*   @SuppressWarnings({ "unchecked", "rawtypes" })
1939
    private TaxonName solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
1940
        Map<String,String> ato = namesMap.get(original);
1941
        if (ato == null) {
1942
            ato = namesMap.get(original+" "+author);
1943
        }
1944

    
1945

    
1946
        if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
1947
            rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1948
        }
1949
        if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
1950
            rank = getRank(ato);
1951
        }
1952
        //        TaxonName nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1953
        TaxonName nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1954
        //                logger.info("RANK: "+rank);
1955
        int retry=0;
1956
        List<ParserProblem> problems = nameTBF.getParsingProblems();
1957
        for (ParserProblem pb:problems) {
1958
            System.out.println(pb.toString());
1959
        }
1960
        while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1961
            addProblemNameToFile(name,author,nomenclaturalCode,rank);
1962
            String fullname=name;
1963
            if(! skippQuestion) {
1964
                fullname =  getFullReference(name,nameTBF.getParsingProblems());
1965
            }
1966
            if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1967
                nameTBF = TaxonNameFactory.NewBotanicalInstance(null);
1968
            }
1969
            if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1970
                nameTBF = TaxonNameFactory.NewZoologicalInstance(null);
1971
            }
1972
            if (nomenclaturalCode.equals(NomenclaturalCode.ICNP)){
1973
                nameTBF= TaxonNameFactory.NewBacterialInstance(null);
1974
            }
1975
            parser.parseReferencedName(nameTBF, fullname, rank, false);
1976
            retry++;
1977
        }
1978
        if (retry == 1){
1979
            if(author != null){
1980
                if (name.indexOf(author)>-1) {
1981
                    nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
1982
                } else {
1983
                    nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1984
                }
1985
                if (nameTBF.hasProblem()){
1986
                    if (name.indexOf(author)>-1) {
1987
                        addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
1988
                    } else {
1989
                        addProblemNameToFile(name,author,nomenclaturalCode,rank);
1990
                    }
1991
                    //                    System.out.println("TBF still has problems "+nameTBF.hasProblem());
1992
                    problems = nameTBF.getParsingProblems();
1993
                    for (ParserProblem pb:problems) {
1994
                        System.out.println(pb.toString());
1995
                    }
1996
                    nameTBF.setFullTitleCache(name, true);
1997
                }else{
1998
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
1999
                        ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2000
                    }
2001
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2002
                        ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2003
                    }
2004
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNP)) {
2005
                        ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2006
                    }
2007
                }
2008
                //                    logger.info("FULL TITLE CACHE "+name);
2009
            }else{
2010
                nameTBF.setFullTitleCache(name, true);
2011
            }
2012
        }
2013
        return nameTBF;
2014
    }
2015

    
2016
     */
2017

    
2018
    /**
2019
     * @param nomenclatureNode: the XML nodes
2020
     * @param nametosave: the list of objects to save into the CDM
2021
     * @param refMods: the current reference extracted from the MODS
2022
     * @return
2023
     */
2024
    @SuppressWarnings({ "rawtypes" })
2025
    private Taxon extractNomenclature(Node nomenclatureNode,  List<TaxonName> nametosave, Reference refMods) throws ClassCastException{
2026
        refMods=CdmBase.deproxy(refMods, Reference.class);
2027

    
2028
        logger.info("extractNomenclature");
2029
        NodeList children = nomenclatureNode.getChildNodes();
2030
        String freetext="";
2031
        Taxon acceptedTaxon = null;
2032
        //   INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2033

    
2034
        //        String fullContent = nomenclatureNode.getTextContent();
2035

    
2036
        NomenclaturalStatusType statusType = null;
2037
        String newNameStatus = null;
2038
        //TODO
2039
        for (int i=0;i<children.getLength();i++){
2040
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:status")){
2041
                String status = children.item(i).getTextContent().trim();
2042

    
2043
                if (!status.isEmpty()){
2044
                	if (newNameStatus(status) != null){
2045
                		newNameStatus = newNameStatus(status);
2046
                    }else{
2047
	                    try {
2048
	                        statusType = nomStatusString2NomStatus(status);
2049
	                    } catch (UnknownCdmTypeException e) {
2050
	//                    	nomNovStatus;
2051
	                    	addProblematicStatusToFile(status);
2052
	                        logger.warn("Problem with status: " + status);
2053
	                    }
2054
                    }
2055
                }
2056
            }
2057
        }
2058

    
2059
        boolean containsSynonyms=false;
2060
        boolean wasSynonym = false;
2061
        usedFollowingTextPrefix = null;  //reset
2062

    
2063
        for (int i=0; i<children.getLength(); i++){
2064
        	Node childNode = children.item(i);
2065
        	String childName = childNode.getNodeName();
2066

    
2067

    
2068
        	//following text
2069
        	followingText = null;
2070
        	if ( i + 1 < children.getLength()){
2071
            	Node followingTextNode = children.item(i +1);
2072
            	if (followingTextNode.getNodeName().equals("#text") && !followingTextNode.getTextContent().matches("\\s*") ){
2073
            		followingText = followingTextNode.getTextContent();
2074
            	}
2075
        	}
2076

    
2077
        	//traverse nodes
2078
            if (childName.equalsIgnoreCase("#text")) {
2079
                freetext = childNode.getTextContent().trim();
2080
                if (usedFollowingTextPrefix != null && freetext.startsWith(usedFollowingTextPrefix)){
2081
                	freetext = freetext.substring(usedFollowingTextPrefix.length());
2082
                }
2083
                usedFollowingTextPrefix = null;  //reset
2084
            }else if (childName.equalsIgnoreCase("tax:collection_event")) {
2085
                //                System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2086
                extractMaterialsDirect(childNode, acceptedTaxon, refMods, "collection", currentMyName.getTaxonName());
2087
            }else if(childName.equalsIgnoreCase("tax:name")){
2088
                INonViralName nameToBeFilled;
2089
                //System.out.println("HANDLE FIRST NAME OF THE LIST");
2090
                if(!containsSynonyms){
2091
                	wasSynonym = false;
2092

    
2093
                	//System.out.println("I : "+i);
2094
                    currentMyName = new MyName(false);
2095
                    try {
2096
                        currentMyName = extractScientificName(childNode, refMods, followingText);
2097
                        treatmentMainName = currentMyName.getNewName();
2098
                        originalTreatmentName = currentMyName.getOriginalName();
2099

    
2100
                    } catch (TransformerFactoryConfigurationError e1) {
2101
                        throw new RuntimeException(e1);
2102
                    } catch (TransformerException e1) {
2103
                    	throw new RuntimeException(e1);
2104
                    }
2105

    
2106
                    if (currentMyName.getRank().equals(Rank.UNKNOWN_RANK()) || currentMyName.getRank().isLower(state2.getConfig().getMaxRank()) || currentMyName.getRank().equals(state2.getConfig().getMaxRank())){
2107
                        maxRankRespected=true;
2108

    
2109
                        nameToBeFilled=currentMyName.getTaxonName();
2110

    
2111
                        //   acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2112
                        acceptedTaxon=currentMyName.getTaxon();
2113
                        //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2114

    
2115

    
2116
                        boolean statusMatch=false;
2117
                        if(acceptedTaxon !=null ){
2118
                            acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2119
                            statusMatch=compareStatus(acceptedTaxon, statusType);
2120
                            //System.out.println("statusMatch: "+statusMatch);
2121
                        }
2122
                        if (acceptedTaxon ==null || (acceptedTaxon != null && !statusMatch)){
2123

    
2124
                            nameToBeFilled=currentMyName.getTaxonName();
2125
                            if (nameToBeFilled != null){
2126
                                if (!originalTreatmentName.isEmpty()) {
2127
                                    TaxonNameDescription td = TaxonNameDescription.NewInstance();
2128
                                    td.setTitleCache(originalTreatmentName, true);
2129
                                    nameToBeFilled.addDescription(td);
2130
                                }
2131

    
2132
                                if(statusType != null) {
2133
                                    nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2134
                                }
2135
                                if(newNameStatus != null){
2136
                                	nameToBeFilled.setAppendedPhrase(newNameStatus);
2137
                                }
2138
                                sourceHandler.addSource(refMods, TaxonName.castAndDeproxy(nameToBeFilled));
2139

    
2140
                                if (nameToBeFilled.getNomenclaturalReference() == null) {
2141
                                    acceptedTaxon= Taxon.NewInstance(nameToBeFilled,refMods);
2142
                                    //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2143
                                }
2144
                                else {
2145
                                    acceptedTaxon= Taxon.NewInstance(nameToBeFilled,nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
2146
                                    //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2147
                                }
2148

    
2149
                                sourceHandler.addSource(refMods, acceptedTaxon);
2150

    
2151
                                if(!state2.getConfig().doKeepOriginalSecundum()) {
2152
                                    acceptedTaxon.setSec(state2.getConfig().getSecundum());
2153
                                    //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2154
                                    //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2155
                                }
2156

    
2157
                                if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2158
                                    setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2159
                                }
2160

    
2161

    
2162
                                importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2163
                                acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2164
                            }
2165

    
2166
                        }else{
2167
                            acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2168
                            Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2169
                            boolean sourcelinked=false;
2170
                            for (IdentifiableSource source:sources){
2171
                                if (source.getCitation().getTitleCache().equalsIgnoreCase(refMods.getTitleCache())) {
2172
                                    sourcelinked=true;
2173
                                }
2174
                            }
2175
                            if (!state2.getConfig().doKeepOriginalSecundum()) {
2176
                                acceptedTaxon.setSec(state2.getConfig().getSecundum());
2177
                                //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2178
                                //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2179
                            }
2180
                            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2181

    
2182
                            if (!sourcelinked){
2183
                                sourceHandler.addSource(refMods, acceptedTaxon);
2184
                            }
2185
                            if (!sourcelinked || !state2.getConfig().doKeepOriginalSecundum()){
2186

    
2187
                                if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2188
                                    //FIXME are these identifiers really related to taxa, not names? Exiting LSIDs come from Zoobank, urn:lsid:biosci.ohio-state.edu:osuc_concepts:134826 (Ants)
2189
                                	setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2190
                                }
2191
                                importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2192
                            }
2193
                        }
2194
                    }else{
2195
                        maxRankRespected=false;
2196
                    }
2197
                    containsSynonyms=true;  //all folowing names are handled as synonyms
2198
                }else{
2199
                    try{
2200
                        extractSynonyms(childNode, acceptedTaxon, refMods, followingText);
2201
                        wasSynonym = true;
2202

    
2203
                    }catch(NullPointerException e){
2204
                        logger.warn("null pointer exception, the accepted taxon might be null");
2205
                    }
2206
                }
2207
                containsSynonyms=true;
2208
            }else if (childName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
2209
                reloadClassification();
2210
                //extract the References within the document
2211
                extractReferences(childNode,nametosave,acceptedTaxon,refMods);
2212
            }else if (childName.equalsIgnoreCase("tax:bibref")){
2213
            	logger.warn(childName + " still preliminary");
2214

    
2215
            	TaxonName currentName = currentMyName == null ? null : currentMyName.getTaxonName();
2216
            	boolean handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2217
            	if (! handled){
2218
            		setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2219
            	}
2220
            }else{
2221
            	logger.warn(childName + " not yet handled");
2222
            }
2223
            if(!stringIsEmpty(freetext.trim())) {;
2224
                if (! freetext.matches("\\d\\.?")){
2225
                    TaxonName currentName = currentMyName == null ? null : currentMyName.getTaxonName();
2226
                	boolean handled = false;
2227
                	if (currentName != null && !wasSynonym){
2228
                		handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2229
                	}
2230
                	if (! handled){
2231
                		setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2232
                	}
2233
                }
2234

    
2235
                 freetext = "";
2236
            }
2237

    
2238
        }
2239
        //importer.getClassificationService().saveOrUpdate(classification);
2240
        return acceptedTaxon;
2241
    }
2242

    
2243

    
2244

    
2245

    
2246
	/**
2247
     * @return
2248
     */
2249

    
2250
    private boolean compareStatus(TaxonBase<?> t, NomenclaturalStatusType statusType) {
2251
        //logger.info("compareStatus");
2252
        boolean statusMatch=false;
2253
        //found one taxon
2254
        Set<NomenclaturalStatus> status = t.getName().getStatus();
2255
        if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
2256
            for (NomenclaturalStatus st:status){
2257
                NomenclaturalStatusType stype = st.getType();
2258
                if (stype.toString().equalsIgnoreCase(statusType.toString())) {
2259
                    statusMatch=true;
2260
                }
2261
            }
2262
        }
2263
        else{
2264
            if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
2265
                statusMatch=true;
2266
            }
2267
        }
2268
        return statusMatch;
2269
    }
2270

    
2271
    /**
2272
     * @param acceptedTaxon: the current acceptedTaxon
2273
     * @param ref: the current reference extracted from the MODS
2274
     * @return the parent for the current accepted taxon
2275
     */
2276
    /*  private Taxon createParent(Taxon acceptedTaxon, Reference ref) {
2277
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2278

    
2279
        List<Rank> rankList = new ArrayList<Rank>();
2280
        rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2281

    
2282
        List<String> rankListStr = new ArrayList<String>();
2283
        for (Rank r:rankList) {
2284
            rankListStr.add(r.toString());
2285
        }
2286
        String r="";
2287
        String s = acceptedTaxon.getTitleCache();
2288
        Taxon tax = null;
2289
        if(!skippQuestion){
2290
            int addTaxon = askAddParent(s);
2291
            logger.info("ADD TAXON: "+addTaxon);
2292
            if (addTaxon == 0 ){
2293
                Taxon tmp = askParent(acceptedTaxon, classification);
2294
                if (tmp == null){
2295
                    s = askSetParent(s);
2296
                    r = askRank(s,rankListStr);
2297

    
2298
                    TaxonName nameToBeFilled = null;
2299
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2300
                        nameToBeFilled = TaxonNameFactory.NewBotanicalInstance(null);
2301
                    }
2302
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2303
                        nameToBeFilled = TaxonNameFactory.NewZoologicalInstance(null);
2304
                    }
2305
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNP)){
2306
                        nameToBeFilled = TaxonNameFactory.NewBacterialInstance(null);
2307
                    }
2308
                    nameToBeFilled.setTitleCache(s, true);
2309
                    nameToBeFilled.setRank(getRank(r), true);
2310

    
2311
                    tax = Taxon.NewInstance(nameToBeFilled, ref);
2312
                }
2313
                else{
2314
                    tax=tmp;
2315
                }
2316

    
2317
                createParent(tax, ref);
2318
                //            logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2319
                classification.addParentChild(tax, acceptedTaxon, ref, null);
2320
            }
2321
            else{
2322
                classification.addChildTaxon(acceptedTaxon, ref, null);
2323
                tax=acceptedTaxon;
2324
            }
2325
        } else{
2326
            classification.addChildTaxon(acceptedTaxon, ref, null);
2327
            tax=acceptedTaxon;
2328
        }
2329
        //        logger.info("RETURN: "+tax );
2330
        return tax;
2331

    
2332
    }
2333

    
2334
     */
2335

    
2336

    
2337
    private MyName  extractScientificNameSynonym(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2338
        //System.out.println("extractScientificNameSynonym");
2339
        logger.info("extractScientificNameSynonym");
2340
        String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2341
        List<String> rankListToPrint = new ArrayList<String>();
2342
        for (String r : rankListToPrint_tmp) {
2343
            rankListToPrint.add(r.toLowerCase());
2344
        }
2345

    
2346
        Rank rank = Rank.UNKNOWN_RANK();
2347
        NodeList children = name.getChildNodes();
2348
        String originalName="";
2349
        String fullName = "";
2350
        String newName="";
2351
        String identifier="";
2352
        HashMap<String, String> atomisedMap = new HashMap<>();
2353
        List<String> atomisedName= new ArrayList<String>();
2354

    
2355
        String rankStr = "";
2356
        Rank tmpRank ;
2357

    
2358
        String status= extractStatus(children);
2359

    
2360
        for (int i=0;i<children.getLength();i++){
2361
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2362
                NodeList atom = children.item(i).getChildNodes();
2363
                for (int k=0;k<atom.getLength();k++){
2364
                    identifier = extractIdentifier(identifier, atom.item(k));
2365
                    tmpRank = null;
2366
                    rankStr = atom.item(k).getNodeName().toLowerCase();
2367
                    //                    logger.info("RANKSTR:*"+rankStr+"*");
2368
                    if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2369
                        rankStr=atom.item(k).getTextContent().trim();
2370
                        tmpRank = getRank(rankStr);
2371
                    }
2372
                    //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2373
                    if (tmpRank != null){
2374
                        rank=tmpRank;
2375
                    }
2376
                    atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2377
                }
2378
                addAtomisedNamesToMap(rankListToPrint, rank, atomisedName, atom);
2379
            }
2380
            if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2381
                //                logger.info("name non atomised: "+children.item(i).getTextContent());
2382
                fullName = children.item(i).getTextContent().trim();
2383
                //                logger.info("fullname: "+fullName);
2384
            }
2385
        }
2386
        originalName=fullName;
2387
        fullName = cleanName(fullName, atomisedName);
2388
        namesMap.put(fullName,atomisedMap);
2389

    
2390
        String atomisedNameStr = getAtomisedNameStr(atomisedName);
2391

    
2392
        if (fullName != null){
2393
            //            System.out.println("fullname: "+fullName);
2394
            //            System.out.println("atomised: "+atomisedNameStr);
2395
            if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2396
                if (skippQuestion){
2397
                    //                    String defaultN = "";
2398
                    if (atomisedNameStr.length()>fullName.length()) {
2399
                        newName=atomisedNameStr;
2400
                    } else {
2401
                        if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2402
                            newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2403
                        } else {
2404
                            newName=fullName;
2405
                        }
2406
                    }
2407
                } else {
2408
                    newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2409
                }
2410
            } else {
2411
                newName=fullName;
2412
            }
2413
        }
2414
        //not really needed
2415
        //        rank = askForRank(newName, rank, nomenclaturalCode);
2416
        //        System.out.println("atomised: "+atomisedMap.toString());
2417

    
2418
        //        String[] names = new String[5];
2419
        MyName myname = new MyName(true);
2420

    
2421
        //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2422
        //        System.out.println(atomisedMap.keySet());
2423
        fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2424
        myname.setOriginalName(fullName);
2425
        myname.setNewName(newName);
2426
        myname.setRank(rank);
2427
        myname.setIdentifier(identifier);
2428
        myname.setStatus(status);
2429
        myname.setSource(refMods);
2430

    
2431
        //        boolean higherAdded=false;
2432

    
2433

    
2434
        boolean parseNameManually=false;
2435
        INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
2436
        TaxonName nameToBeFilledTest ;
2437

    
2438
        //if selected the atomised version
2439
        if(newName==atomisedNameStr){
2440
            nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2441
            if (nameToBeFilledTest.hasProblem()){
2442
                addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2443
                nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode, rank);
2444
                if (nameToBeFilledTest.hasProblem()){
2445
                    addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2446
                    parseNameManually=true;
2447
                }
2448
            }
2449
        }else{
2450
            nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2451
            if (nameToBeFilledTest.hasProblem()){
2452
                addProblemNameToFile("fullversion",fullName, nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2453
                nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode,rank);
2454
                parseNameManually=true;
2455
                if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2456
                    addNameDifferenceToFile(originalName,atomisedNameStr);
2457
                }
2458
            }
2459
        }
2460

    
2461
        if(parseNameManually){
2462
            //System.out.println("DO IT MANUALLY");
2463
        	if (this.state2.getConfig().isUseOldUnparsedSynonymExtraction()){
2464
                createUnparsedSynonym(rank, newName, atomisedMap, myname);
2465
        	}else{
2466
        		createUnparsedSynonymNew(rank, newName, atomisedMap, myname, refMods);;
2467
        	}
2468
        } else{
2469
            //System.out.println("AUTOMATIC!");
2470
            //            createAtomisedTaxonString(newName, atomisedMap, myname);
2471
            myname.setParsedName(nameToBeFilledTest);
2472
            myname.buildTaxon();
2473
        }
2474
        //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2475
        return myname;
2476
    }
2477

    
2478

    
2479
	/**
2480
     * @param name
2481
     * @throws TransformerFactoryConfigurationError
2482
     * @throws TransformerException
2483
     * @return a list of possible names
2484
     */
2485
    @SuppressWarnings({"rawtypes" })
2486
    private MyName extractScientificName(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2487
        logger.info("extractScientificName");
2488

    
2489
        String[] rankListToPrintLowerCase_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2490
        List<String> rankListToPrint = Arrays.asList(rankListToPrintLowerCase_tmp);
2491

    
2492
        Rank rank = Rank.UNKNOWN_RANK();
2493
        NodeList children = name.getChildNodes();
2494
        String originalName = "";
2495
        String fullName = "";
2496
        String newName = "";
2497
        String identifier = "";
2498
        HashMap<String, String> atomisedMap = new HashMap<>();
2499
        List<String> atomisedNameList= new ArrayList<>();
2500

    
2501
        String status= extractStatus(children);
2502

    
2503
        for (int i=0;i<children.getLength();i++){
2504
        	Node nameChild = children.item(i);
2505
            if(nameChild.getNodeName().equalsIgnoreCase("tax:xmldata")){
2506
                NodeList xmlDataChildren = nameChild.getChildNodes();
2507
                for (int k=0;k<xmlDataChildren.getLength();k++){
2508
                	Node xmlDataChild = xmlDataChildren.item(k);
2509
                    identifier = extractIdentifier(identifier, xmlDataChild);
2510
                    String rankStr = xmlDataChild.getNodeName().toLowerCase();
2511
                    if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2512
                        rankStr=xmlDataChild.getTextContent().trim();
2513
                        Rank tmpRank = getRank(rankStr);
2514
                        if (tmpRank != null){
2515
                            rank=tmpRank;
2516
                        }
2517
                    }
2518
                    //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2519

    
2520
                    atomisedMap.put(rankStr.toLowerCase(),xmlDataChild.getTextContent().trim());
2521
                }
2522
                addAtomisedNamesToMap(rankListToPrint, rank, atomisedNameList, xmlDataChildren);
2523
            }
2524
            else if(nameChild.getNodeName().equalsIgnoreCase("#text") && ! nameChild.getTextContent().matches("\\s*")){
2525
                //                logger.info("name non atomised: "+children.item(i).getTextContent());
2526
                fullName = nameChild.getTextContent().trim();
2527
                //                logger.info("fullname: "+fullName);
2528
            }
2529
        }
2530
        originalName=fullName;
2531
        fullName = cleanName(fullName, atomisedNameList);
2532
        namesMap.put(fullName,atomisedMap);
2533

    
2534
        String atomisedNameStr = getAtomisedNameStr(atomisedNameList);
2535

    
2536
        if (fullName != null){
2537
            if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2538
                if (skippQuestion){
2539
                    if (atomisedNameStr.length()>fullName.length()) {
2540
                        newName = atomisedNameStr;
2541
                    } else {
2542
                        if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2543
                            newName = askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2544
                        } else {
2545
                            newName = fullName;
2546
                        }
2547
                    }
2548
                } else {
2549
                    newName=askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2550
                }
2551
            } else {
2552
                newName=fullName;
2553
            }
2554
        }
2555
        //not really needed
2556
        //        rank = askForRank(newName, rank, nomenclaturalCode);
2557
        //        System.out.println("atomised: "+atomisedMap.toString());
2558

    
2559
        //        String[] names = new String[5];
2560
        MyName myname = new MyName(false);
2561

    
2562
        //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2563
        //        System.out.println(atomisedMap.keySet());
2564
        fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2565
        myname.setOriginalName(fullName);
2566
        myname.setNewName(newName);
2567

    
2568
        myname.setRank(rank);
2569
        myname.setIdentifier(identifier);
2570
        myname.setStatus(status);
2571
        myname.setSource(refMods);
2572

    
2573
        //        boolean higherAdded=false;
2574

    
2575

    
2576
        boolean parseNameManually=false;
2577
        INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2578
        TaxonName  nameToBeFilledTest = null;
2579

    
2580
        //if selected the atomised version
2581
        if(newName==atomisedNameStr){
2582
            nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2583
            if (nameToBeFilledTest.hasProblem()){
2584
        	    addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2585
                nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode,rank);
2586
                if (nameToBeFilledTest.hasProblem()){
2587
                    addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2588
                    parseNameManually=true;
2589
                }
2590
            }
2591
        }else{
2592
            nameToBeFilledTest = parseWithExtension(parser, fullName , rank, followingText, atomisedMap);
2593
            if (nameToBeFilledTest.hasProblem()){
2594
                addProblemNameToFile("fullversion",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2595
                nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode,rank);
2596
                parseNameManually=true;
2597
                if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2598
                    addNameDifferenceToFile(originalName,atomisedNameStr);
2599
                }
2600
            }
2601
        }
2602

    
2603
        //System.out.println("parseNameManually: "+parseNameManually);
2604
        if(parseNameManually){
2605
            createAtomisedTaxon(rank, newName, atomisedMap, myname);
2606
        }
2607
        else{
2608
            createAtomisedTaxonString(newName, atomisedMap, myname);
2609
            myname.setParsedName(nameToBeFilledTest);
2610
            //TODO correct handling of createIfNotExists
2611
           	myname.buildTaxon();
2612
        }
2613
        return myname;
2614

    
2615
    }
2616

    
2617
    private TaxonName parseWithExtension(INonViralNameParser parser, String atomisedNameStr, Rank rank, String followingText, HashMap<String, String> atomisedMap) {
2618
    	Object[] nameExtensionResult = getPossibleExtension(followingText, atomisedMap, nomenclaturalCode);
2619

    
2620
    	TaxonName name = (TaxonName)parser.parseFullName(atomisedNameStr, nomenclaturalCode, rank);
2621
    	if (nameExtensionResult != null && nameExtensionResult[0] != null){
2622
    		String ext = (String)nameExtensionResult[0];
2623
    		TaxonName extName = (TaxonName)parser.parseFullName(atomisedNameStr + " " + ext, nomenclaturalCode, rank);
2624
    		if (! extName.hasProblem()){
2625
    			name = extName;
2626
    			this.usedFollowingTextPrefix = ext;
2627
    			//TODO do we need to fill the atomisedMap at all?
2628
    			if ((Boolean)(nameExtensionResult[1])){
2629
    				//TODO
2630
    			}
2631
    			if ((Boolean)(nameExtensionResult[2])){
2632
    				//TODO BasionymYear etc.
2633
    				Integer origYear = name.getPublicationYear();
2634
    				if (origYear != null){
2635
        				atomisedMap.put(PUBLICATION_YEAR, origYear.toString());
2636
    				}
2637
    			}
2638
    		}
2639
    	}
2640
		return name;
2641
	}
2642

    
2643
	private Object[] getPossibleExtension(String followingText, HashMap<String, String> atomisedMap, NomenclaturalCode nomenclaturalCode) {
2644
		if (StringUtils.isBlank(followingText)){
2645
			return null;
2646
		}
2647

    
2648
    	boolean includeAuthor = true;
2649
    	boolean includeYear = false;
2650
		if (atomisedMap.containsKey("dwc:scientificnameauthorship")){
2651
			includeAuthor = false;
2652
		}
2653
    	if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2654
    		includeYear = true;
2655
    	}
2656
    	String patternStr = "";
2657
    	if (includeAuthor){
2658
    		patternStr += NonViralNameParserImplRegExBase.capitalWord;
2659
    	}
2660
    	if (includeYear){
2661
    		patternStr += "\\s*(,|\\s+)\\s*" + "(17|18|19|20)" + "\\d{2}" ;
2662
    	}
2663
    	String match = null;
2664
    	if (! patternStr.isEmpty()){
2665
    		Pattern pattern = Pattern.compile("^" + patternStr);
2666
    		Matcher matcher = pattern.matcher(followingText.trim());
2667
    		if (matcher.find()){
2668
    			match = matcher.group();
2669
    		}
2670
    	}
2671

    
2672
		return new Object[]{match, includeAuthor, includeYear};
2673
	}
2674

    
2675
	/**
2676
     * @param atomisedName
2677
     * @return
2678
     */
2679
    private String getAtomisedNameStr(List<String> atomisedName) {
2680
        //logger.info("getAtomisedNameStr");
2681
        String atomisedNameStr = StringUtils.join(atomisedName," ");
2682
        while(atomisedNameStr.contains("  ")) {
2683
            atomisedNameStr=atomisedNameStr.replace("  ", " ");
2684
        }
2685
        atomisedNameStr=atomisedNameStr.trim();
2686
        return atomisedNameStr;
2687
    }
2688

    
2689
    /**
2690
     * @param children
2691
     * @param status
2692
     * @return
2693
     */
2694
    private String extractStatus(NodeList children) {
2695
        logger.info("extractStatus");
2696
        String status="";
2697
        for (int i=0;i<children.getLength();i++){
2698
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:status") ||
2699
                    (children.item(i).getNodeName().equalsIgnoreCase("tax:namePart") &&
2700
                            children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2701
                status = children.item(i).getTextContent().trim();
2702
            }
2703
        }
2704
        return status;
2705
    }
2706

    
2707
    /**
2708
     * @param identifier
2709
     * @param atom
2710
     * @param k
2711
     * @return
2712
     */
2713
    private String extractIdentifier(String identifier, Node atom) {
2714
        //logger.info("extractIdentifier");
2715
        if (atom.getNodeName().equalsIgnoreCase("tax:xid")){
2716
            try{
2717
                identifier = atom.getAttributes().getNamedItem("identifier").getNodeValue();
2718
            }catch(Exception e){
2719
                System.out.println("pb with identifier, maybe empty");
2720
            }
2721
            try{
2722
                identifier+="__"+atom.getAttributes().getNamedItem("source").getNodeValue();
2723
            }catch(Exception e){
2724
                System.out.println("pb with identifier, maybe empty");
2725
            }
2726
        }
2727
        return identifier;
2728
    }
2729

    
2730
    /**
2731
     * @param rankListToPrint
2732
     * @param rank
2733
     * @param atomisedName
2734
     * @param atom
2735
     */
2736
    private void addAtomisedNamesToMap(List<String> rankListToPrint, Rank rank, List<String> atomisedName, NodeList atom) {
2737
        logger.info("addAtomisedNamesToMap");
2738
        for (int k=0;k<atom.getLength();k++){
2739
        	Node node = atom.item(k);
2740
        	String nodeName = node.getNodeName();
2741
            if (! nodeName.equalsIgnoreCase("dwc:taxonRank") ) {  //rank has been handled in higher method
2742
                if (nodeName.equalsIgnoreCase("dwc:subgenus") || nodeName.equalsIgnoreCase("dwcranks:subgenus")) {
2743
                    atomisedName.add("("+ node.getTextContent().trim()+")");
2744
                } else if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet") || nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2745
                       	if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet")){
2746
                            atomisedName.add("var. "+node.getTextContent().trim());
2747
                        }else if(nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2748
                            atomisedName.add("subsp. "+atom.item(k).getTextContent().trim());
2749
                        }
2750
                } else if(rankListToPrint.contains(nodeName.toLowerCase())) {
2751
                    atomisedName.add(node.getTextContent().trim());
2752
                } else{
2753
                    if (rank.isHigher(Rank.GENUS()) && (nodeName.indexOf("dwcranks:")>-1 || nodeName.indexOf("dwc:Family")>-1)) {
2754
                        atomisedName.add(node.getTextContent().trim());
2755
                    }else if (nodeName.equals("#text")){
2756
                    	String text = node.getTextContent();
2757
                    	if (StringUtils.isNotBlank(text)){
2758
                    		//TODO handle text
2759
                    		logger.warn("name xmldata contains text. This is unhandled");
2760
                    	}
2761
                    }else if (nodeName.matches("(?i)(dwc:Kingdom|dwc:Class|dwc:Order|dwc:Family)")){
2762
                    	//we currently do not use higher ranks information
2763
                    }else{
2764
                    	//TODO handle unhandled node
2765
                    	logger.warn("Unhandled node: " + nodeName);
2766
                    }
2767
                }
2768
            }
2769
        }
2770
    }
2771

    
2772
    /**
2773
     * @param fullName
2774
     * @param atomisedName
2775
     * @return
2776
     */
2777
    private String cleanName(String name, List<String> atomisedName) {
2778
        //logger.info("cleanName");
2779
        String fullName =name;
2780
        if (fullName != null){
2781
            fullName = fullName.replace("( ", "(");
2782
            fullName = fullName.replace(" )",")");
2783

    
2784
            if (fullName.trim().isEmpty()){
2785
                fullName=StringUtils.join(atomisedName," ");
2786
            }
2787

    
2788
            while(fullName.contains("  ")) {
2789
                fullName=fullName.replace("  ", " ");
2790
                //            logger.info("while");
2791
            }
2792
            fullName=fullName.trim();
2793
        }
2794
        return fullName;
2795
    }
2796

    
2797
    /**
2798
     * @param rank
2799
     * @param fullName
2800
     * @param atomisedMap
2801
     * @param myname
2802
     * @return
2803
     */
2804
    private String extractAuthorFromNames(Rank rank, String name, HashMap<String, String> atomisedMap, MyName myname) {
2805
        logger.info("extractAuthorFromNames");
2806
        String fullName=name;
2807
        if (atomisedMap.get("dwc:scientificnameauthorship") == null && fullName!=null){
2808
            //            System.out.println("rank : "+rank.toString());
2809
            if(rank.isHigher(Rank.SPECIES())){
2810
                try{
2811
                    String author=null;
2812
                    if(atomisedMap.get("dwcranks:subgenus") != null) {
2813
                        author = fullName.split(atomisedMap.get("dwcranks:subgenus"))[1].trim();
2814
                    }
2815
                    if(atomisedMap.get("dwc:subgenus") != null) {
2816
                        author = fullName.split(atomisedMap.get("dwc:subgenus"))[1].trim();
2817
                    }
2818
                    if(author == null) {
2819
                        if(atomisedMap.get("dwc:genus") != null) {
2820
                            author = fullName.split(atomisedMap.get("dwc:genus"))[1].trim();
2821
                        }
2822
                    }
2823
                    if(author != null){
2824
                        fullName = fullName.substring(0, fullName.indexOf(author));
2825
                        author=author.replaceAll(",","").trim();
2826
                        myname.setAuthor(author);
2827
                    }
2828
                }catch(Exception e){
2829
                    //could not extract the author
2830
                }
2831
            }
2832
            if(rank.equals(Rank.SPECIES())){
2833
                try{
2834
                    String author=null;
2835
                    if(author == null) {
2836
                        if(atomisedMap.get("dwc:species") != null) {
2837
                            String[] t = fullName.split(atomisedMap.get("dwc:species"));
2838
                            //                            System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2839
                            author = fullName.split(atomisedMap.get("dwc:species"))[1].trim();
2840
                            //                            System.out.println("AUTEUR "+author);
2841
                        }
2842
                    }
2843
                    if(author != null){
2844
                        fullName = fullName.substring(0, fullName.indexOf(author));
2845
                        author=author.replaceAll(",","").trim();
2846
                        myname.setAuthor(author);
2847
                    }
2848
                }catch(Exception e){
2849
                    //could not extract the author
2850
                }
2851
            }
2852
        }else{
2853
            myname.setAuthor(atomisedMap.get("dwc:scientificnameauthorship"));
2854
        }
2855
        return fullName;
2856
    }
2857

    
2858
    /**
2859
     * @param newName
2860
     * @param atomisedMap
2861
     * @param myname
2862
     */
2863
    private void createAtomisedTaxonString(String newName, HashMap<String, String> atomisedMap, MyName myname) {
2864
        logger.info("createAtomisedTaxonString "+atomisedMap);
2865
        if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
2866
            myname.setFamilyStr(atomisedMap.get("dwc:family"));
2867
        }
2868
        if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY())){
2869
            myname.setSubfamilyStr(atomisedMap.get("dwcranks:subfamily"));
2870
        }
2871
        if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
2872
            myname.setTribeStr(atomisedMap.get("dwcranks:tribe"));
2873
        }
2874
        if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
2875
            myname.setSubtribeStr(atomisedMap.get("dwcranks:subtribe"));
2876
        }
2877
        if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
2878
            myname.setGenusStr(atomisedMap.get("dwc:genus"));
2879
        }
2880
        if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2881
            myname.setSubgenusStr(atomisedMap.get("dwcranks:subgenus"));
2882
        }
2883
        if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2884
            myname.setSubgenusStr(atomisedMap.get("dwc:subgenus"));
2885
        }
2886
        if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
2887
            String n=newName;
2888
            if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2889
                n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2890
                n=n.replace("subsp.","");
2891
            }
2892
            if(atomisedMap.get("dwc:subspecies") != null) {
2893
                n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2894
                n=n.replace("subsp.","");
2895
            }
2896
            if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2897
                n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2898
                n=n.replace("var.","");
2899
                n=n.replace("v.","");
2900
            }
2901
            if(atomisedMap.get("dwcranks:formepithet") != null) {
2902
                //TODO
2903
                System.out.println("TODO FORMA");
2904
                n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
2905
                n=n.replace("forma","");
2906
            }
2907
            n=n.trim();
2908
            String author = myname.getAuthor();
2909
            if(n.split(" ").length>2){
2910

    
2911
                String n2=n.split(" ")[0]+" "+n.split(" ")[1];
2912
                String a= "";
2913
                try{
2914
                    a=n.split(n2)[1].trim();
2915
                }catch(Exception e){
2916
                    logger.info("no author in "+n+"?");}
2917

    
2918
                myname.setAuthor(a);
2919
                //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
2920
                n=n2;
2921

    
2922
            }
2923

    
2924
            myname.setSpeciesStr(atomisedMap.get("dwc:species"));
2925
            myname.setAuthor(author);
2926
        }
2927
        if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2928
            myname.setSubspeciesStr(atomisedMap.get("dwc:subspecies"));
2929
        }
2930
        if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2931
            myname.setSubspeciesStr(atomisedMap.get("dwc:infraspecificepithet"));
2932
        }
2933
        if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
2934
            myname.setVarietyStr(atomisedMap.get("dwcranks:varietyepithet"));
2935
        }
2936
        if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
2937
            myname.setFormStr(atomisedMap.get("dwcranks:formepithet"));
2938
        }
2939
        if (atomisedMap.get(PUBLICATION_YEAR) != null){
2940
        	myname.setPublicationYear(Integer.valueOf(atomisedMap.get(PUBLICATION_YEAR)));
2941
        }
2942
    }
2943

    
2944
    /**
2945
     * @see #createUnparsedSynonymNew(Rank, String, HashMap, MyName)
2946
     * @param rank
2947
     * @param newName
2948
     * @param atomisedMap
2949
     * @param myname
2950
     */
2951
    private void createUnparsedSynonym(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
2952
        logger.info("createSynonym");
2953
        //System.out.println("createsynonym");
2954
        if(rank.equals(Rank.UNKNOWN_RANK())){
2955
            myname.setNotParsableTaxon(newName);
2956
        }else{
2957
	        if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY()) && rank.equals(Rank.FAMILY())){
2958
	            myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
2959
	        }
2960
	        if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY()) && rank.equals(Rank.SUBFAMILY())){
2961
	            myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
2962
	        }
2963
	        if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE()) && rank.equals(Rank.TRIBE())){
2964
	            myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
2965
	        }
2966
	        if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE()) && rank.equals(Rank.SUBTRIBE())){
2967
	            myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
2968
	        }
2969
	        if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS()) && rank.equals(Rank.GENUS())){
2970
	            myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
2971
	        }
2972
	        if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2973
	            myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
2974
	        }
2975
	        if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2976
	            myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
2977
	        }
2978
	        if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES()) && rank.equals(Rank.SPECIES())){
2979
	            String n=newName;
2980
	            if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2981
	                n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2982
	                n=n.replace("subsp.","");
2983
	            }
2984
	            if(atomisedMap.get("dwc:subspecies") != null) {
2985
	                n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2986
	                n=n.replace("subsp.","");
2987
	            }
2988
	            if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2989
	                n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2990
	                n=n.replace("var.","");
2991
	                n=n.replace("v.","");
2992
	            }
2993
	            if(atomisedMap.get("dwcranks:formepithet") != null) {
2994
	                //TODO
2995
	                //System.out.println("TODO FORMA");
2996
	                n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
2997
	                n=n.replace("forma","");
2998
	            }
2999
	            n=n.trim();
3000
	            String author = myname.getAuthor();
3001
	            if(n.split(" ").length>2){
3002

    
3003
	                String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3004
	                String a="";
3005
	                try{
3006
	                    a= n.split(n2)[1].trim();
3007
	                }catch(Exception e){logger.info("no author in "+n);}
3008
	                myname.setAuthor(a);
3009
	                //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3010
	                n=n2;
3011

    
3012
	            }
3013
	            Taxon species = myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank);
3014
	            myname.setSpecies(species);
3015
	            myname.setAuthor(author);
3016
	        }
3017
	        if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3018
	            myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3019
	        }
3020
	        if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3021
	            myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3022
	        }
3023
	        if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY()) && rank.equals(Rank.VARIETY())){
3024
	            myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3025
	        }
3026
	        if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM()) && rank.equals(Rank.FORM())){
3027
	            myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3028
	        }
3029
        }
3030

    
3031
    }
3032

    
3033

    
3034
    /**
3035
     * @param refMods
3036
     * @see #createUnparsedSynonym(Rank, String, HashMap, MyName)
3037
     * the original TaxonXImport extracted Synonyms by creating acc Taxa with partial names
3038
     * I (AM) do not understand this but don't want to destroy code which maybe works in some cases) there
3039
     * I created this switch for old
3040
     * for Spiders the new version is preferred
3041
     */
3042
    private void createUnparsedSynonymNew(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname, Reference refMods) {
3043
        logger.info("createSynonym");
3044

    
3045
        INonViralName nameToBeFilled = this.getNonViralNameAccNomenclature();
3046
        //System.out.println("createsynonym");
3047
        if(rank.equals(Rank.UNKNOWN_RANK())){
3048
            //TODO
3049
        	myname.setNotParsableTaxon(newName);
3050

    
3051
        	nameToBeFilled.setTitleCache(newName, true);
3052
        }else{
3053
        	if(atomisedMap.get("dwc:genus") != null ){
3054
    			nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:genus"));
3055
	        }
3056
        	if (rank.isSupraGeneric()){
3057
        		if (atomisedMap.get("dwcranks:subtribe") != null ){
3058
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3059
    	        }else if (atomisedMap.get("dwcranks:subtribe") != null ){
3060
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3061
    	        }else if (atomisedMap.get("dwcranks:tribe") != null ){
3062
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:tribe"));
3063
    	        }else if (atomisedMap.get("dwcranks:subfamily") != null ){
3064
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subfamily"));
3065
    	        }else if (atomisedMap.get("dwc:family") != null ){
3066
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:family"));
3067
        	    }else{
3068
        	    	logger.warn("Supra generic rank not yet handled or atomisation not available");
3069
        	    }
3070
        	}
3071
        	if (atomisedMap.get("dwcranks:subgenus") != null){
3072
        		nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwcranks:subgenus"));
3073
        	}
3074
        	if (atomisedMap.get("dwc:subgenus") != null){
3075
        		nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwc:subgenus"));
3076
        	}
3077
        	if (atomisedMap.get("dwc:species") != null){
3078
        		nameToBeFilled.setSpecificEpithet(atomisedMap.get("dwc:species"));
3079
        	}
3080
        	if (atomisedMap.get("dwcranks:formepithet") != null){
3081
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:formepithet"));
3082
        	}else if (atomisedMap.get("dwcranks:varietyepithet") != null){
3083
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:varietyepithet"));
3084
        	}else if (atomisedMap.get("dwc:infraspecificepithet") != null){
3085
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:infraspecificepithet"));
3086
        	}else if (atomisedMap.get("dwc:subspecies") != null){
3087
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:subspecies"));
3088
        	}
3089
            Reference sec = sourceUrlRef;
3090
            if(!state2.getConfig().doKeepOriginalSecundum()){
3091
                sec = state2.getConfig().getSecundum();
3092
            }
3093
        	Synonym syn = Synonym.NewInstance(nameToBeFilled, sec);
3094
//        	sourceHandler.addSource(refMods, syn);
3095
        	myname.setSyno(syn);
3096
        	myname.setSynonym(true);
3097
        }
3098
	}
3099

    
3100
    /**
3101
     * @param rank
3102
     * @param newName
3103
     * @param atomisedMap
3104
     * @param myname
3105
     */
3106
    private void createAtomisedTaxon(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
3107
        logger.info("createAtomisedTaxon "+atomisedMap);
3108
        if(rank.equals(Rank.UNKNOWN_RANK())){
3109
            myname.setNotParsableTaxon(newName);
3110
        }
3111
        else{
3112
            if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
3113
                myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
3114
            }
3115
            if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY())){
3116
                myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
3117
            }
3118
            if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
3119
                myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
3120
            }
3121
            if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
3122
                myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
3123
            }
3124
            if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
3125
                myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
3126
            }
3127
            if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3128
                myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
3129
            }
3130
            if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3131
                myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
3132
            }
3133
            if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
3134
                String n=newName;
3135
                if(atomisedMap.get("dwc:infraspecificepithet") != null) {
3136
                    n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
3137
                    n=n.replace("subsp.","");
3138
                }
3139
                if(atomisedMap.get("dwc:subspecies") != null) {
3140
                    n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
3141
                    n=n.replace("subsp.","");
3142
                }
3143
                if(atomisedMap.get("dwcranks:varietyepithet") != null) {
3144
                    n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
3145
                    n=n.replace("var.","");
3146
                    n=n.replace("v.","");
3147
                }
3148
                if(atomisedMap.get("dwcranks:formepithet") != null) {
3149
                    //TODO
3150
                    //System.out.println("TODO FORMA");
3151
                    n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3152
                    n=n.replace("forma","");
3153
                }
3154
                n=n.trim();
3155
                String author = myname.getAuthor();
3156
                if(n.split(" ").length>2){
3157
                    String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3158
                    String a="";
3159
                    try{
3160
                        a= n.split(n2)[1].trim();
3161
                    }catch(Exception e){logger.info("no author  in "+n);}
3162
                    myname.setAuthor(a);
3163
                    //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3164
                    n=n2;
3165

    
3166
                }
3167

    
3168
                myname.setSpecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank));
3169
                myname.setAuthor(author);
3170
            }
3171
            if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3172
                myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3173
            }
3174
            if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3175
                myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3176
            }
3177
            if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
3178
                myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3179
            }
3180
            if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
3181
                myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3182
            }
3183
        }
3184
    }
3185

    
3186
    /**
3187
     * @return
3188
     */
3189
    private boolean checkRankValidForImport(Rank currentRank) {
3190
        //logger.info("checkRankValidForImport");
3191
        return currentRank.isLower(state2.getConfig().getMaxRank()) || currentRank.equals(state2.getConfig().getMaxRank());
3192
    }
3193

    
3194

    
3195

    
3196
    /**
3197
     * @param classification2
3198
     */
3199
    public void updateClassification(Classification classification2) {
3200
        //logger.info("updateClassification");
3201
        classification = classification2;
3202
    }
3203

    
3204

    
3205

    
3206
    public class MyName {
3207
        /**
3208
         * @param isSynonym
3209
         */
3210
        public MyName(boolean isSynonym) {
3211
            super();
3212
            this.isSynonym = isSynonym;
3213
        }
3214

    
3215
        String originalName="";
3216
        String newName="";
3217
        Rank rank=Rank.UNKNOWN_RANK();
3218
        String identifier="";
3219
        String status="";
3220
        String author=null;
3221

    
3222
        TaxonName taxonName;
3223

    
3224
        Reference refMods ;
3225

    
3226
        Taxon family,subfamily,tribe,subtribe,genus,subgenus,species,subspecies, variety,form;
3227
        INonViralName familyName, subfamilyName, tribeName,subtribeName,genusName,subgenusName,speciesName,subspeciesName;
3228
        String familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr;
3229
        Integer publicationYear;
3230

    
3231

    
3232
		Taxon higherTaxa;
3233
        Rank higherRank;
3234
        private Taxon taxon;
3235
        private Synonym syno;
3236

    
3237
        /**
3238
         * @return the syno
3239
         */
3240
        public Synonym getSyno() {
3241
            return syno;
3242
        }
3243

    
3244
        @Override
3245
        public String toString(){
3246
            List<String> tot=new ArrayList<String>();
3247
            String[] n= {familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr};
3248
            for (String elt:n){
3249
                if (!StringUtils.isEmpty(elt)) {
3250
                    tot.add(elt);
3251
                } else {
3252
                    tot.add("*");
3253
                }
3254
            }
3255
            return StringUtils.join(tot," ");
3256
        }
3257
        /**
3258
         * @param syno the syno to set
3259
         */
3260
        public void setSyno(Synonym syno) {
3261
            this.syno = syno;
3262
        }
3263

    
3264
        boolean isSynonym=false;
3265

    
3266
        /**
3267
         * @return the isSynonym
3268
         */
3269
        public boolean isSynonym() {
3270
            return isSynonym;
3271
        }
3272

    
3273
        /**
3274
         * @param isSynonym the isSynonym to set
3275
         */
3276
        public void setSynonym(boolean isSynonym) {
3277
            this.isSynonym = isSynonym;
3278
        }
3279

    
3280
        public void setSource(Reference re){
3281
            refMods=re;
3282
        }
3283

    
3284
        /**
3285
         * @param string
3286
         */
3287
        public void setFormStr(String string) {
3288
            this.formStr=string;
3289

    
3290
        }
3291
        /**
3292
         * @param string
3293
         */
3294
        public void setVarietyStr(String string) {
3295
            this.varietyStr=string;
3296

    
3297
        }
3298
        /**
3299
         * @param string
3300
         */
3301
        public void setSubspeciesStr(String string) {
3302
            this.subspeciesStr=string;
3303

    
3304
        }
3305
        /**
3306
         * @param string
3307
         */
3308
        public void setSpeciesStr(String string) {
3309
            this.speciesStr=string;
3310

    
3311
        }
3312
        /**
3313
         * @param string
3314
         */
3315
        public void setSubgenusStr(String string) {
3316
            this.subgenusStr=string;
3317

    
3318
        }
3319
        /**
3320
         * @param string
3321
         */
3322
        public void setGenusStr(String string) {
3323
            this.genusStr=string;
3324

    
3325
        }
3326
        /**
3327
         * @param string
3328
         */
3329
        public void setSubtribeStr(String string) {
3330
            this.subtribeStr=string;
3331

    
3332
        }
3333
        /**
3334
         * @param string
3335
         */
3336
        public void setTribeStr(String string) {
3337
            this.tribeStr=string;
3338

    
3339
        }
3340
        /**
3341
         * @param string
3342
         */
3343
        public void setSubfamilyStr(String string) {
3344
            this.subfamilyStr=string;
3345

    
3346
        }
3347
        /**
3348
         * @param string
3349
         */
3350
        public void setFamilyStr(String string) {
3351
            this.familyStr=string;
3352

    
3353
        }
3354
        /**
3355
         * @return the familyStr
3356
         */
3357
        public String getFamilyStr() {
3358
            return familyStr;
3359
        }
3360
        /**
3361
         * @return the subfamilyStr
3362
         */
3363
        public String getSubfamilyStr() {
3364
            return subfamilyStr;
3365
        }
3366
        /**
3367
         * @return the tribeStr
3368
         */
3369
        public String getTribeStr() {
3370
            return tribeStr;
3371
        }
3372
        /**
3373
         * @return the subtribeStr
3374
         */
3375
        public String getSubtribeStr() {
3376
            return subtribeStr;
3377
        }
3378
        /**
3379
         * @return the genusStr
3380
         */
3381
        public String getGenusStr() {
3382
            return genusStr;
3383
        }
3384
        /**
3385
         * @return the subgenusStr
3386
         */
3387
        public String getSubgenusStr() {
3388
            return subgenusStr;
3389
        }
3390
        /**
3391
         * @return the speciesStr
3392
         */
3393
        public String getSpeciesStr() {
3394
            return speciesStr;
3395
        }
3396
        /**
3397
         * @return the subspeciesStr
3398
         */
3399
        public String getSubspeciesStr() {
3400
            return subspeciesStr;
3401
        }
3402
        /**
3403
         * @return the formStr
3404
         */
3405
        public String getFormStr() {
3406
            return formStr;
3407
        }
3408
        /**
3409
         * @return the varietyStr
3410
         */
3411
        public String getVarietyStr() {
3412
            return varietyStr;
3413
        }
3414

    
3415
        public Integer getPublicationYear() {
3416
			return publicationYear;
3417
		}
3418

    
3419
		public void setPublicationYear(Integer publicationYear) {
3420
			this.publicationYear = publicationYear;
3421
		}
3422

    
3423
        /**
3424
         * @param newName2
3425
         */
3426
        public void setNotParsableTaxon(String newName2) {
3427
            //takes too much time
3428
            //            List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3429

    
3430
            NomenclaturalStatusType statusType = null;
3431
            if (!getStatus().isEmpty()){
3432
                try {
3433
                    statusType = nomStatusString2NomStatus(getStatus());
3434
                } catch (UnknownCdmTypeException e) {
3435
                    addProblematicStatusToFile(getStatus());
3436
                    logger.warn("Problem with status");
3437
                }
3438
            }
3439
            List<TaxonBase> tmpList = new ArrayList<>();
3440

    
3441
            Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, newName2, MatchMode.BEGINNING, null, null, null, null, null);
3442
            tmpList.addAll(taxontest.getRecords());
3443

    
3444
            //logger.info("tmpList returned: "+tmpList.size());
3445

    
3446

    
3447
            INonViralName identicName = null;
3448
            boolean foundIdentic=false;
3449
            TaxonBase<?> tmpTaxonBase=null;
3450
            //            Taxon tmpPartial=null;
3451
            for (TaxonBase<?> tmpb:tmpList){
3452
                if(tmpb !=null){
3453
                    TaxonName tnb =  tmpb.getName();
3454
                    Rank crank=null;
3455
                    if (tnb != null){
3456
                        if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2) ){
3457
                            crank =tnb.getRank();
3458
                            if (crank !=null && rank !=null){
3459
                                if (crank.equals(rank)){
3460
                                	identicName = tnb;
3461
                                	if (isSynonym && tmpb.isInstanceOf(Synonym.class) || !isSynonym && tmpb.isInstanceOf(Taxon.class)){
3462
                                		foundIdentic=true;
3463
                                		tmpTaxonBase=tmpb;
3464
                               			break;
3465
                                	}
3466
                                }
3467
                            }
3468
                        }
3469
                    }
3470
                }
3471
            }
3472
            boolean statusMatch=false;
3473
            boolean appendedMatch=false;
3474
            if(tmpTaxonBase !=null && foundIdentic){
3475
                statusMatch=compareStatus(tmpTaxonBase, statusType);
3476
                if (!getStatus().isEmpty() && ! (tmpTaxonBase.getAppendedPhrase() == null)) {
3477
                    appendedMatch=tmpTaxonBase.getAppendedPhrase().equals(getStatus());
3478
                }
3479
                if (getStatus().isEmpty() && tmpTaxonBase.getAppendedPhrase() == null) {
3480
                    appendedMatch=true;
3481
                }
3482

    
3483
            }
3484
            if ((tmpTaxonBase == null || !foundIdentic) ||  (tmpTaxonBase != null && !statusMatch) ||  (tmpTaxonBase != null && !appendedMatch && !statusMatch)){
3485

    
3486
            	INonViralName tnb;
3487
            	if (identicName == null){
3488
            		tnb = getNonViralNameAccNomenclature();
3489
            		tnb.setRank(rank);
3490

    
3491
	                if(statusType != null) {
3492
	                    tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3493
	                }
3494
	                if(StringUtils.isNotBlank(getStatus())) {
3495
	                    tnb.setAppendedPhrase(getStatus());
3496
	                }
3497
	                tnb.setTitleCache(newName2,true);
3498
	                tmpTaxonBase = findMatchingTaxon(tnb,refMods);
3499
	            }else{
3500
            		tnb = identicName;
3501
            	}
3502

    
3503
                if(tmpTaxonBase==null){
3504
                    tmpTaxonBase = isSynonym ? Synonym.NewInstance(tnb, refMods) : Taxon.NewInstance(tnb, refMods);
3505
                    if(!state2.getConfig().doKeepOriginalSecundum()) {
3506
                        tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3507
                    }
3508
                    //tmptaxonbase.setSec(refMods);
3509
                    if(!isSynonym) {
3510
                        classification.addChildTaxon((Taxon)tmpTaxonBase, null, null);
3511
                        sourceHandler.addSource(refMods, (Taxon)tmpTaxonBase);
3512
                    }
3513
                }
3514
            }
3515

    
3516
            tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3517
            if (author != null) {
3518
                if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3519
                    setLSID(getIdentifier(), tmpTaxonBase);
3520
                    importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3521
                    tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3522
                }
3523
            }
3524
            TaxonName tnb = CdmBase.deproxy(tmpTaxonBase.getName(), TaxonName.class);
3525

    
3526
            if(!isSynonym) {
3527
                this.taxon=(Taxon)tmpTaxonBase;
3528
            } else {
3529
                if (tmpTaxonBase instanceof Taxon){
3530
                	logger.warn("Incorrect status");
3531
                }
3532
            	this.syno=(Synonym)tmpTaxonBase;
3533
            }
3534

    
3535
            taxonName = tnb;
3536

    
3537
        }
3538

    
3539
        /**
3540
         *
3541
         */
3542
        public void buildTaxon() {
3543
            //System.out.println("BUILD TAXON");
3544
            logger.info("buildTaxon");
3545
            NomenclaturalStatusType statusType = null;
3546
            if (!getStatus().isEmpty()){
3547
            	status = getStatus();
3548
            	String newNameStatus = newNameStatus(status);
3549
            	if (newNameStatus != null){
3550
            		taxonName.setAppendedPhrase(newNameStatus);
3551
            	}else{
3552
            		try {
3553
            			statusType = nomStatusString2NomStatus(getStatus());
3554
            			taxonName.addStatus(NomenclaturalStatus.NewInstance(statusType));
3555
            		} catch (UnknownCdmTypeException e) {
3556
            			addProblematicStatusToFile(getStatus());
3557
            			logger.warn("Problem with status");
3558
            		}
3559
            	}
3560
            }
3561
            importer.getNameService().save(taxonName);
3562

    
3563
            TaxonBase<?> tmpTaxonBase;
3564
            if (!isSynonym) {
3565
                tmpTaxonBase =Taxon.NewInstance(taxonName, refMods); //sec set null
3566
            }
3567
            else {
3568
                tmpTaxonBase =Synonym.NewInstance(taxonName, refMods); //sec set null
3569
            }
3570
            boolean exist = false;
3571
            if (!isSynonym){
3572
	            for (TaxonNode node : classification.getAllNodes()){
3573
	                try{
3574
	                	Taxon nodeTaxon = node.getTaxon();
3575
	                	boolean titleMatches = nodeTaxon.getTitleCache().equalsIgnoreCase(tmpTaxonBase.getTitleCache());
3576
	                	boolean nomStatusMatches = compareStatus(node.getTaxon(), statusType);
3577
	                	boolean nodeNameReplaceable = checkNodeNameReplaceable(nodeTaxon, tmpTaxonBase);
3578
	                    if(titleMatches && nomStatusMatches) {
3579
	                    	if (!isSynonym) {
3580
	                    		tmpTaxonBase=CdmBase.deproxy(nodeTaxon, TaxonBase.class);
3581
	                            exist =true;
3582
	                        } else {
3583
	                            logger.info("Found the same name but from another type (taxon/synonym)");
3584
	                            TaxonName existingTnb = getTaxon().getName();
3585
                                tmpTaxonBase = Synonym.NewInstance(existingTnb, refMods);
3586
                                importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3587
                                exist =true;
3588
                            }
3589
	                    }else if (nodeNameReplaceable){
3590
	                    	nodeTaxon.setName(tmpTaxonBase.getName());
3591
	                    	tmpTaxonBase = nodeTaxon;
3592
	                    	exist = true;
3593
	                    }
3594
	                }catch(NullPointerException n){logger.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3595
	            }
3596
            }
3597
            if (!exist){
3598

    
3599
                boolean insertAsExisting =false;
3600
                List<Taxon> existingTaxons=new ArrayList<Taxon>();
3601
                try {
3602
                    existingTaxons = getMatchingTaxa(taxonName);
3603
                } catch (Exception e1) {
3604
                    e1.printStackTrace();
3605
                }
3606
                double similarityScore=0.0;
3607
                double similarityAuthor=-1;
3608
                String author1="";
3609
                String author2="";
3610
                String t1="";
3611
                String t2="";
3612
                for (Taxon bestMatchingTaxon : existingTaxons){
3613
                    //System.out.println("tnbase "+taxonname.getTitleCache());
3614
                    //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3615
                    if(taxonName.getAuthorshipCache()!=null) {
3616
                    	author1=taxonName.getAuthorshipCache();
3617
                    }
3618
                    try {
3619
                        if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
3620
                            author2=bestMatchingTaxon.getName().getAuthorshipCache();
3621
                        }
3622
                    } catch (Exception e) {
3623
                        // TODO Auto-generated catch block
3624
                        e.printStackTrace();
3625
                    }
3626
                    try {
3627
                        t1=taxonName.getTitleCache();
3628
                        if (author1!=null && !StringUtils.isEmpty(author1)) {
3629
                            t1=t1.split(Pattern.quote(author1))[0];
3630
                        }
3631
                    } catch (Exception e) {
3632
                        // TODO Auto-generated catch block
3633
                        e.printStackTrace();
3634
                    }
3635
                    try {
3636
                        t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
3637
                        if (author2!=null && !StringUtils.isEmpty(author2)) {
3638
                            t2=t2.split(Pattern.quote(author2))[0];
3639
                        }
3640
                    } catch (Exception e) {
3641
                        // TODO Auto-generated catch block
3642
                        e.printStackTrace();
3643
                    }
3644

    
3645
                    similarityScore=similarity(t1.trim(), t2.trim());
3646
                    //System.out.println("taxonscore "+similarityScore);
3647
                    similarityAuthor=similarity(author1.trim(), author2.trim());
3648
                    //System.out.println("authorscore "+similarityAuthor);
3649
                    insertAsExisting = compareAndCheckTaxon(taxonName, refMods, similarityScore, bestMatchingTaxon, similarityAuthor);
3650
                    if(insertAsExisting) {
3651
                        tmpTaxonBase=bestMatchingTaxon;
3652
                        break;
3653
                    }
3654
                }
3655
                if ( !insertAsExisting ){
3656
                    if(!state2.getConfig().doKeepOriginalSecundum()) {
3657
                        tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3658
                    }
3659

    
3660
                    //                    tmptaxonbase.setSec(refMods);
3661
                    if (taxonName.getRank().equals(state2.getConfig().getMaxRank())) {
3662
                        //System.out.println("****************************"+tmptaxonbase);
3663
                        if (!isSynonym) {
3664
                            classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3665
                        }
3666
                    } else{
3667
                        hierarchy = new HashMap<>();
3668
                        //System.out.println("LOOK FOR PARENT "+taxonname.toString()+", "+tmptaxonbase.toString());
3669
                        if (!isSynonym){
3670
                            lookForParentNode(taxonName,(Taxon)tmpTaxonBase, refMods,this);
3671
                            //System.out.println("HIERARCHY "+hierarchy);
3672
                            Taxon parent = buildHierarchy();
3673
                            if(!taxonExistsInClassification(parent,(Taxon)tmpTaxonBase)){
3674
                                if(parent !=null) {
3675
                                    classification.addParentChild(parent, (Taxon)tmpTaxonBase, refMods, null);
3676
                                } else {
3677
                                    classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3678
                                }
3679
                                importer.getClassificationService().saveOrUpdate(classification);
3680
                            }
3681
                        }
3682
                        //                        Set<TaxonNode> nodeList = classification.getAllNodes();
3683
                        //                        for(TaxonNode tn:nodeList) {
3684
                        //                            System.out.println(tn.getTaxon());
3685
                        //                        }
3686
                    }
3687
                }
3688
                importer.getClassificationService().saveOrUpdate(classification);
3689
                 if(isSynonym) {
3690
                    try{
3691
                        Synonym castTest=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3692
                    }catch(Exception e){
3693
                        TaxonName existingTnb = tmpTaxonBase.getName();
3694
                        Synonym castTest = Synonym.NewInstance(existingTnb, refMods);
3695
                        importer.getTaxonService().saveOrUpdate(castTest);
3696
                        tmpTaxonBase=CdmBase.deproxy(castTest, Synonym.class);
3697
                    }
3698
                }
3699
            }
3700
            if(!isSynonym) {
3701
                taxon=CdmBase.deproxy(tmpTaxonBase, Taxon.class);
3702
            } else {
3703
                syno=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3704
            }
3705

    
3706
        }
3707

    
3708
		private boolean checkNodeNameReplaceable(Taxon nodeTaxon, TaxonBase<?> newTaxon) {
3709
			//TODO preliminary check
3710
			if (newTaxon.isInstanceOf(Synonym.class)){
3711
				return false;
3712
			}
3713
			INonViralName nodeName = nodeTaxon.getName();
3714
			INonViralName newName = newTaxon.getName();
3715
			if (nodeTaxon.getName() == null ||  newName == null){
3716
				return false;
3717
			}
3718
			if (nodeTaxon.getDescriptions().size() > 0 || nodeName.getDescriptions().size() > 0 || nodeName.getTypeDesignations().size() > 0 ){
3719
				return false;
3720
			}
3721
			boolean compare = true;
3722
			for (NomenclaturalStatus status : newName.getStatus() ){
3723
				compare &= compareStatus(nodeTaxon, status.getType());
3724
			}
3725
			if (! compare){
3726
				return false;
3727
			}
3728

    
3729
			if (nodeName.getNameCache() != null && nodeName.getNameCache().equals(newName.getNameCache())){
3730
				if (nodeName.getNameCache().equals(nodeName.getTitleCache())){
3731
					if (newName.getNameCache().length() < newName.getTitleCache().length()){
3732
						logger.warn("We still need to check, if node was automatically created via hierarchy creation: " + nodeName.getNameCache());
3733
						return true;
3734
					}
3735
				}
3736
			}
3737

    
3738
			return false;
3739
		}
3740

    
3741
		/**
3742
         *
3743
         */
3744
        private Taxon buildHierarchy() {
3745
            logger.info("buildHierarchy");
3746
            Taxon higherTaxon = null;
3747
            //add the maxRank as a root
3748
            if(hierarchy.containsKey(state2.getConfig().getMaxRank())){
3749
                Taxon ct=hierarchy.get(state2.getConfig().getMaxRank());
3750
                if(!taxonExistsInClassification(higherTaxon, ct)) {
3751
                   classification.addChildTaxon(ct, refMods, null);
3752
                }
3753
                higherTaxon = hierarchy.get(state2.getConfig().getMaxRank());
3754
                //                return higherTaxon;
3755
            }
3756
            //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3757

    
3758
            //TODO higher Ranks
3759

    
3760
            if(hierarchy.containsKey(Rank.FAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.FAMILY())){
3761
                higherTaxon=saveAndGetHigherTaxon(Rank.FAMILY(),higherTaxon);
3762
            }
3763
            if(hierarchy.containsKey(Rank.SUBFAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.SUBFAMILY())){
3764
                higherTaxon=saveAndGetHigherTaxon(Rank.SUBFAMILY(),higherTaxon);
3765
            }
3766
            if(hierarchy.containsKey(Rank.TRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.TRIBE())){
3767
                higherTaxon=saveAndGetHigherTaxon(Rank.TRIBE(),higherTaxon);
3768
            }
3769
            if(hierarchy.containsKey(Rank.SUBTRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.SUBTRIBE())){
3770
                higherTaxon=saveAndGetHigherTaxon(Rank.SUBTRIBE(),higherTaxon);
3771
            }
3772
            if(hierarchy.containsKey(Rank.GENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3773
                higherTaxon=saveAndGetHigherTaxon(Rank.GENUS(),higherTaxon);
3774
            }
3775
            if(hierarchy.containsKey(Rank.SUBGENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3776
                higherTaxon=saveAndGetHigherTaxon(Rank.SUBGENUS(),higherTaxon);
3777
            }
3778
            importer.getClassificationService().saveOrUpdate(classification);
3779
            return higherTaxon;
3780
        }
3781

    
3782
        private Taxon saveAndGetHigherTaxon(Rank r, Taxon higherTaxon){
3783
            Taxon ct=hierarchy.get(r);
3784
            if(!taxonExistsInClassification(higherTaxon,ct )) {
3785
                if(higherTaxon != null && ct!=null) {
3786
                    classification.addParentChild(higherTaxon, ct, refMods, null);
3787
                } else
3788
                    if(higherTaxon == null && ct !=null) {
3789
                        classification.addChildTaxon(ct, refMods, null);
3790
                }
3791
            }
3792
            return ct;
3793
        }
3794

    
3795
        private boolean taxonExistsInClassification(Taxon parent, Taxon child){
3796
            logger.info("taxonExistsInClassification");
3797
            //            System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3798
            boolean found=false;
3799
            if(parent !=null){
3800
                for (TaxonNode p : classification.getAllNodes()){
3801
                    if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
3802
                        for (TaxonNode c : p.getChildNodes()) {
3803
                            if (c.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3804
                                found=true;
3805
                                break;
3806
                            }
3807
                        }
3808
                    }
3809
                }
3810
            }
3811
            else{
3812
                for (TaxonNode p : classification.getAllNodes()){
3813
                    if(p.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3814
                        found=true;
3815
                        break;
3816
                    }
3817
                }
3818
            }
3819
            //            System.out.println("LOOK IF TAXA EXIST? "+found);
3820
            return found;
3821
        }
3822
        /**
3823
         * @param nameToBeFilledTest
3824
         */
3825
        public void setParsedName(TaxonName nameToBeFilledTest) {
3826
            this.taxonName = TaxonName.castAndDeproxy(nameToBeFilledTest);
3827

    
3828
        }
3829
        //variety dwcranks:varietyEpithet
3830
        /**
3831
         * @return the author
3832
         */
3833
        public String getAuthor() {
3834
            return author;
3835
        }
3836
        /**
3837
         * @return
3838
         */
3839
        public Taxon getTaxon() {
3840
            return taxon;
3841
        }
3842
        /**
3843
         * @return
3844
         */
3845
        public TaxonName getTaxonName() {
3846
            return taxonName;
3847
        }
3848

    
3849
        /**
3850
         * @param findOrCreateTaxon
3851
         */
3852
        public void setForm(Taxon form) {
3853
            this.form=form;
3854

    
3855
        }
3856
        /**
3857
         * @param findOrCreateTaxon
3858
         */
3859
        public void setVariety(Taxon variety) {
3860
            this.variety=variety;
3861

    
3862
        }
3863
        /**
3864
         * @param string
3865
         * @return
3866
         */
3867
        @SuppressWarnings("rawtypes")
3868
        public Taxon findOrCreateTaxon(String partialname,String fullname, Rank rank, Rank globalrank) {
3869
            logger.info("findOrCreateTaxon");
3870
            sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
3871
            //takes too much time
3872
            //            List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3873
            //            logger.info("tmpList returned: "+tmpList.size());
3874

    
3875
            NomenclaturalStatusType statusType = null;
3876
            if (!getStatus().isEmpty()){
3877
                try {
3878
                    statusType = nomStatusString2NomStatus(getStatus());
3879
                } catch (UnknownCdmTypeException e) {
3880
                    addProblematicStatusToFile(getStatus());
3881
                    logger.warn("Problem with status");
3882
                }
3883
            }
3884

    
3885
            List<TaxonBase> tmpListFiltered = new ArrayList<TaxonBase>();
3886

    
3887
            Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, fullname, MatchMode.BEGINNING, null, null, null, null, null);
3888

    
3889
            tmpListFiltered.addAll(taxontest.getRecords());
3890
            taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, partialname, MatchMode.BEGINNING, null, null, null, null, null);
3891
            tmpListFiltered.addAll(taxontest.getRecords());
3892

    
3893
            //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3894

    
3895
            boolean nameCorrected=false;
3896
            if (fullname.indexOf(partialname)<0) {
3897
                nameCorrected=true;
3898
            }
3899

    
3900
            boolean foundIdentic=false;
3901
            Taxon tmp=null;
3902
            for (TaxonBase tmpb:tmpListFiltered){
3903
                if(tmpb !=null){
3904
                    TaxonName tnb =  tmpb.getName();
3905
                    Rank crank=null;
3906
                    if (tnb != null){
3907
                         if(globalrank.equals(rank) || (globalrank.isLower(Rank.SPECIES()) && rank.equals(Rank.SPECIES()))){
3908
                            if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname) ){
3909
                                crank =tnb.getRank();
3910
                                if (crank !=null && rank !=null){
3911
                                    if (crank.equals(rank)){
3912
                                        foundIdentic=true;
3913
                                        try{
3914
                                            tmp=(Taxon)tmpb;
3915
                                            break;
3916
                                        }catch(Exception e){
3917
                                            e.printStackTrace();
3918
                                        }
3919
                                    }
3920
                                }
3921
                            }
3922
                            if(nameCorrected){ //for corrected names such as Anochetus -- A. blf-pat
3923
                                if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3924
                                    crank =tnb.getRank();
3925
                                    if (crank !=null && rank !=null){
3926
                                        if (crank.equals(rank)){
3927
                                            foundIdentic=true;
3928
                                            try{
3929
                                                tmp=(Taxon)tmpb;
3930
                                                break;
3931
                                            }catch(Exception e){
3932
                                                e.printStackTrace();
3933
                                            }
3934
                                        }
3935
                                    }
3936
                                }
3937
                            }
3938
                        }
3939
                        else{
3940
                            if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3941
                                crank =tnb.getRank();
3942
                                if (crank !=null && rank !=null){
3943
                                    if (crank.equals(rank)){
3944
                                        foundIdentic=true;
3945
                                        try{
3946
                                            tmp=(Taxon)tmpb;
3947
                                            break;
3948
                                        }catch(Exception e){
3949
                                            e.printStackTrace();
3950
                                        }
3951
                                    }
3952
                                }
3953
                            }
3954
                        }
3955
                    }
3956
                }
3957
            }
3958
            boolean statusMatch=false;
3959
            boolean appendedMatch=false;
3960
            if(tmp !=null && foundIdentic){
3961
                statusMatch=compareStatus(tmp, statusType);
3962
                if (!getStatus().isEmpty() && ! (tmp.getAppendedPhrase() == null)) {
3963
                    appendedMatch=tmp.getAppendedPhrase().equals(getStatus());
3964
                }
3965
                if (getStatus().isEmpty() && tmp.getAppendedPhrase() == null) {
3966
                    appendedMatch=true;
3967
                }
3968

    
3969
            }
3970
            if ((tmp == null || !foundIdentic) ||  (tmp != null && !statusMatch) ||  (tmp != null && !appendedMatch && !statusMatch)){
3971

    
3972
                INonViralName tnb = getNonViralNameAccNomenclature();
3973
                tnb.setRank(rank);
3974

    
3975
                if(statusType != null) {
3976
                    tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3977
                }
3978
                if(StringUtils.isNotBlank(getStatus())) {
3979
                    tnb.setAppendedPhrase(getStatus());
3980
                }
3981

    
3982
                if(rank.equals(Rank.UNKNOWN_RANK())){
3983
                    tnb.setTitleCache(fullname, true);
3984
                    //                    tnb.setGenusOrUninomial(fullname);
3985
                }
3986
                if(rank.isHigher(Rank.GENUS())) {
3987
                    tnb.setGenusOrUninomial(partialname);
3988
                }
3989

    
3990
                if(rank.isHigher(Rank.SPECIES())) {
3991
                    tnb.setTitleCache(partialname, true);
3992
                }
3993

    
3994
                if (rank.equals(globalrank) && author != null) {
3995

    
3996
                    tnb.setCombinationAuthorship(findOrCreateAuthor(author));
3997
                    if (getIdentifier() !=null && !getIdentifier().isEmpty()){
3998
                        Taxon taxonLSID = getTaxonByLSID(getIdentifier());
3999
                        if (taxonLSID !=null) {
4000
                            tmp=taxonLSID;
4001
                        }
4002
                    }
4003
                }
4004

    
4005
                if(tmp == null){
4006
                    if (rank.equals(Rank.FAMILY())) {
4007
                        tmp = buildFamily(tnb);
4008
                    }
4009
                    if (rank.equals(Rank.SUBFAMILY())) {
4010
                        tmp = buildSubfamily(tnb);
4011
                    }
4012
                    if (rank.equals(Rank.TRIBE())) {
4013
                        tmp = buildTribe(tnb);
4014
                    }
4015
                    if (rank.equals(Rank.SUBTRIBE())) {
4016
                        tmp = buildSubtribe(tnb);
4017
                    }
4018
                    if (rank.equals(Rank.GENUS())) {
4019
                        tmp = buildGenus(partialname, tnb);
4020
                    }
4021

    
4022
                    if (rank.equals(Rank.SUBGENUS())) {
4023
                        tmp = buildSubgenus(partialname, tnb);
4024
                    }
4025
                    if (rank.equals(Rank.SPECIES())) {
4026
                        tmp = buildSpecies(partialname, tnb);
4027
                    }
4028

    
4029
                    if (rank.equals(Rank.SUBSPECIES())) {
4030
                        tmp = buildSubspecies(partialname, tnb);
4031
                    }
4032

    
4033
                    if (rank.equals(Rank.VARIETY())) {
4034
                        tmp = buildVariety(fullname, partialname, tnb);
4035
                    }
4036

    
4037
                    if (rank.equals(Rank.FORM())) {
4038
                        tmp = buildForm(fullname, partialname, tnb);
4039
                    }
4040
                    if (tmp != null){
4041
                    	TaxonXTreatmentExtractor.this.sourceHandler.addSource(refMods, tmp);
4042
                    }
4043

    
4044
                    importer.getClassificationService().saveOrUpdate(classification);
4045
                }
4046

    
4047
            }
4048

    
4049
            tmp = CdmBase.deproxy(tmp, Taxon.class);
4050
            if (rank.equals(globalrank) && author != null) {
4051
                if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4052
                    setLSID(getIdentifier(), tmp);
4053
                    importer.getTaxonService().saveOrUpdate(tmp);
4054
                    tmp = CdmBase.deproxy(tmp, Taxon.class);
4055
                }
4056
            }
4057

    
4058
            this.taxon=tmp;
4059

    
4060
            return tmp;
4061
        }
4062

    
4063
        /**
4064
         * @param tnb
4065
         * @return
4066
         */
4067
        private Taxon buildSubfamily(INonViralName tnb) {
4068
            Taxon tmp;
4069
            //            tnb.generateTitle();
4070
            tmp = findMatchingTaxon(tnb,refMods);
4071
            if(tmp ==null){
4072
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4073
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4074
                    tmp.setSec(state2.getConfig().getSecundum());
4075
                }
4076
                //                tmp.setSec(refMods);
4077
                //                sourceHandler.addSource(refMods, tmp);
4078
                if(family != null) {
4079
                    classification.addParentChild(family, tmp, null, null);
4080
                    higherRank=Rank.FAMILY();
4081
                    higherTaxa=family;
4082
                } else {
4083
                    //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4084
                    classification.addChildTaxon(tmp, null, null);
4085
                }
4086
            }
4087
            return tmp;
4088
        }
4089
        /**
4090
         * @param tnb
4091
         * @return
4092
         */
4093
        private Taxon buildFamily(INonViralName tnb) {
4094
            Taxon tmp;
4095
            //            tnb.generateTitle();
4096
            tmp = findMatchingTaxon(tnb,refMods);
4097
            if(tmp ==null){
4098
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4099
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4100
                    tmp.setSec(state2.getConfig().getSecundum());
4101
                }
4102
                //                tmp.setSec(refMods);
4103
                //sourceHandler.addSource(refMods, tmp);
4104
                //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4105
                classification.addChildTaxon(tmp, null, null);
4106
            }
4107
            return tmp;
4108
        }
4109
        /**
4110
         * @param fullname
4111
         * @param tnb
4112
         * @return
4113
         */
4114
        private Taxon buildForm(String fullname, String partialname, INonViralName tnb) {
4115
            if (genusName !=null) {
4116
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4117
            }
4118
            if (subgenusName !=null) {
4119
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4120
            }
4121
            if(speciesName !=null) {
4122
                tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4123
            }
4124
            if(subspeciesName != null) {
4125
                tnb.setInfraSpecificEpithet(subspeciesName.getInfraSpecificEpithet());
4126
            }
4127
            if(partialname!= null) {
4128
                tnb.setInfraSpecificEpithet(partialname);
4129
            }
4130
             //TODO how to save form??
4131
            tnb.setTitleCache(fullname, true);
4132
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4133
            if(tmp ==null){
4134
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4135
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4136
                    tmp.setSec(state2.getConfig().getSecundum());
4137
                }
4138
                //                tmp.setSec(refMods);
4139
                //sourceHandler.addSource(refMods, tmp);
4140
                if (subspecies !=null) {
4141
                    classification.addParentChild(subspecies, tmp, null, null);
4142
                    higherRank=Rank.SUBSPECIES();
4143
                    higherTaxa=subspecies;
4144
                } else {
4145
                    if (species !=null) {
4146
                        classification.addParentChild(species, tmp, null, null);
4147
                        higherRank=Rank.SPECIES();
4148
                        higherTaxa=species;
4149
                    }
4150
                    else{
4151
                        //                        System.out.println("ADDCHILDTAXON FORM "+tmp);
4152
                        classification.addChildTaxon(tmp, null, null);
4153
                    }
4154
                }
4155
            }
4156
            return tmp;
4157
        }
4158
        /**
4159
         * @param fullname
4160
         * @param tnb
4161
         * @return
4162
         */
4163
        private Taxon buildVariety(String fullname, String partialname, INonViralName tnb) {
4164
            Taxon tmp;
4165
            if (genusName !=null) {
4166
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4167
            }
4168
            if (subgenusName !=null) {
4169
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4170
            }
4171
            if(speciesName !=null) {
4172
                tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4173
            }
4174
            if(subspeciesName != null) {
4175
                tnb.setInfraSpecificEpithet(subspeciesName.getSpecificEpithet());
4176
            }
4177
            if(partialname != null) {
4178
                tnb.setInfraSpecificEpithet(partialname);
4179
            }
4180
            //TODO how to save variety?
4181
            tnb.setTitleCache(fullname, true);
4182
            tmp = findMatchingTaxon(tnb,refMods);
4183
            if(tmp ==null){
4184
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4185
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4186
                    tmp.setSec(state2.getConfig().getSecundum());
4187
                }
4188
                //                tmp.setSec(refMods);
4189
                //sourceHandler.addSource(refMods, tmp);
4190
                if (subspecies !=null) {
4191
                    classification.addParentChild(subspecies, tmp, null, null);
4192
                    higherRank=Rank.SUBSPECIES();
4193
                    higherTaxa=subspecies;
4194
                } else {
4195
                    if(species !=null) {
4196
                        classification.addParentChild(species, tmp, null, null);
4197
                        higherRank=Rank.SPECIES();
4198
                        higherTaxa=species;
4199
                    }
4200
                    else{
4201
                        //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4202
                        classification.addChildTaxon(tmp, null, null);
4203
                    }
4204
                }
4205
            }
4206
            return tmp;
4207
        }
4208
        /**
4209
         * @param partialname
4210
         * @param tnb
4211
         * @return
4212
         */
4213
        private Taxon buildSubspecies(String partialname, INonViralName tnb) {
4214
            if (genusName !=null) {
4215
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4216
            }
4217
            if (subgenusName !=null) {
4218
                //                            System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4219
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4220
            }
4221
            if(speciesName !=null) {
4222
                //                            System.out.println("SPE:"+speciesName.getSpecificEpithet());
4223
                tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4224
            }
4225
            tnb.setInfraSpecificEpithet(partialname);
4226
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4227
            if(tmp ==null){
4228
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4229
                if(!state2.getConfig().doKeepOriginalSecundum())
4230
                 {
4231
                    tmp.setSec(state2.getConfig().getSecundum());
4232
                //                tmp.setSec(refMods);
4233
                //sourceHandler.addSource(refMods, tmp);
4234
                }
4235

    
4236
                if(species != null) {
4237
                    classification.addParentChild(species, tmp, null, null);
4238
                    higherRank=Rank.SPECIES();
4239
                    higherTaxa=species;
4240
                }
4241
                else{
4242
                    //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4243
                    classification.addChildTaxon(tmp, null, null);
4244
                }
4245
            }
4246
            return tmp;
4247
        }
4248
        /**
4249
         * @param partialname
4250
         * @param tnb
4251
         * @return
4252
         */
4253
        private Taxon buildSpecies(String partialname, INonViralName tnb) {
4254
            if (genusName !=null) {
4255
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4256
            }
4257
            if (subgenusName !=null) {
4258
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4259
            }
4260
            tnb.setSpecificEpithet(partialname.toLowerCase());
4261
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4262
            if(tmp ==null){
4263
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4264
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4265
                    tmp.setSec(state2.getConfig().getSecundum());
4266
                }
4267
                //                tmp.setSec(refMods);
4268
                //sourceHandler.addSource(refMods, tmp);
4269
                if (subgenus !=null) {
4270
                    classification.addParentChild(subgenus, tmp, null, null);
4271
                    higherRank=Rank.SUBGENUS();
4272
                    higherTaxa=subgenus;
4273
                } else {
4274
                    if (genus !=null) {
4275
                        classification.addParentChild(genus, tmp, null, null);
4276
                        higherRank=Rank.GENUS();
4277
                        higherTaxa=genus;
4278
                    }
4279
                    else{
4280
                        //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4281
                        classification.addChildTaxon(tmp, null, null);
4282
                    }
4283
                }
4284
            }
4285
            return tmp;
4286
        }
4287
        /**
4288
         * @param partialname
4289
         * @param tnb
4290
         * @return
4291
         */
4292
        private Taxon buildSubgenus(String partialname, INonViralName tnb) {
4293
            tnb.setInfraGenericEpithet(partialname);
4294
            if (genusName !=null) {
4295
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4296
            }
4297
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4298
            if(tmp ==null){
4299
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4300
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4301
                    tmp.setSec(state2.getConfig().getSecundum());
4302
                }
4303
                //                tmp.setSec(refMods);
4304
                //sourceHandler.addSource(refMods, tmp);
4305
                if(genus != null) {
4306
                    classification.addParentChild(genus, tmp, null, null);
4307
                    higherRank=Rank.GENUS();
4308
                    higherTaxa=genus;
4309
                } else{
4310
                    //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4311
                    classification.addChildTaxon(tmp, null, null);
4312
                }
4313
            }
4314
            return tmp;
4315
        }
4316
        /**
4317
         * @param partialname
4318
         * @param tnb
4319
         * @return
4320
         */
4321
        private Taxon buildGenus(String partialname, INonViralName tnb) {
4322
            Taxon tmp;
4323
            tnb.setGenusOrUninomial(partialname);
4324

    
4325

    
4326
            tmp = findMatchingTaxon(tnb,refMods);
4327
            if(tmp ==null){
4328
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4329
                if(!state2.getConfig().doKeepOriginalSecundum())
4330
                 {
4331
                    tmp.setSec(state2.getConfig().getSecundum());
4332
                //                tmp.setSec(refMods);
4333
                //sourceHandler.addSource(refMods, tmp);
4334
                }
4335

    
4336
                if(subtribe != null) {
4337
                    classification.addParentChild(subtribe, tmp, null, null);
4338
                    higherRank=Rank.SUBTRIBE();
4339
                    higherTaxa=subtribe;
4340
                } else{
4341
                    if(tribe !=null) {
4342
                        classification.addParentChild(tribe, tmp, null, null);
4343
                        higherRank=Rank.TRIBE();
4344
                        higherTaxa=tribe;
4345
                    } else{
4346
                        if(subfamily !=null) {
4347
                            classification.addParentChild(subfamily, tmp, null, null);
4348
                            higherRank=Rank.SUBFAMILY();
4349
                            higherTaxa=subfamily;
4350
                        } else
4351
                            if(family !=null) {
4352
                                classification.addParentChild(family, tmp, null, null);
4353
                                higherRank=Rank.FAMILY();
4354
                                higherTaxa=family;
4355
                            }
4356
                            else{
4357
                                //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4358
                                classification.addChildTaxon(tmp, null, null);
4359
                            }
4360
                    }
4361
                }
4362
            }
4363
            return tmp;
4364
        }
4365

    
4366
        /**
4367
         * @param tnb
4368
         * @return
4369
         */
4370
        private Taxon buildSubtribe(INonViralName tnb) {
4371
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4372
            if(tmp==null){
4373
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4374
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4375
                    tmp.setSec(state2.getConfig().getSecundum());
4376
                }
4377
                //                tmp.setSec(refMods);
4378
                //sourceHandler.addSource(refMods, tmp);
4379
                if(tribe != null) {
4380
                    classification.addParentChild(tribe, tmp, null, null);
4381
                    higherRank=Rank.TRIBE();
4382
                    higherTaxa=tribe;
4383
                } else{
4384
                    //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4385
                    classification.addChildTaxon(tmp, null, null);
4386
                }
4387
            }
4388
            return tmp;
4389
        }
4390
        /**
4391
         * @param tnb
4392
         * @return
4393
         */
4394
        private Taxon buildTribe(INonViralName tnb) {
4395
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4396
            if(tmp==null){
4397
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4398
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4399
                    tmp.setSec(state2.getConfig().getSecundum());
4400
                }
4401
                //                tmp.setSec(refMods);
4402
                //sourceHandler.addSource(refMods, tmp);
4403
                if (subfamily !=null) {
4404
                    classification.addParentChild(subfamily, tmp, null, null);
4405
                    higherRank=Rank.SUBFAMILY();
4406
                    higherTaxa=subfamily;
4407
                } else {
4408
                    if(family != null) {
4409
                        classification.addParentChild(family, tmp, null, null);
4410
                        higherRank=Rank.FAMILY();
4411
                        higherTaxa=family;
4412
                    }
4413
                    else{
4414
                        //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4415
                        classification.addChildTaxon(tmp, null, null);
4416
                    }
4417
                }
4418
            }
4419
            return tmp;
4420
        }
4421

    
4422
        /**
4423
         * @param identifier2
4424
         * @return
4425
         */
4426
        @SuppressWarnings("rawtypes")
4427
        private Taxon getTaxonByLSID(String identifier) {
4428
            //logger.info("getTaxonByLSID");
4429
            //            boolean lsidok=false;
4430
            String id = identifier.split("__")[0];
4431
            //            String source = identifier.split("__")[1];
4432
            LSID lsid = null;
4433
            if (id.indexOf("lsid")>-1){
4434
                try {
4435
                    lsid = new LSID(id);
4436
                    //                    lsidok=true;
4437
                } catch (MalformedLSIDException e) {
4438
                    logger.warn("Malformed LSID");
4439
                }
4440
            }
4441
            if (lsid !=null){
4442
                List<Taxon> taxa = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
4443
                LSID currentlsid=null;
4444
                for (Taxon t:taxa){
4445
                    currentlsid = t.getLsid();
4446
                    if (currentlsid !=null){
4447
                        if (currentlsid.getLsid().equals(lsid.getLsid())){
4448
                            try{
4449
                                return t;
4450
                            }
4451
                            catch(Exception e){logger.warn("Exception occurred while comparing LSIDs "+e );}
4452
                        }
4453
                    }
4454
                }
4455
            }
4456
            return null;
4457
        }
4458
        /**
4459
         * @param author2
4460
         * @return
4461
         */
4462
        @SuppressWarnings("rawtypes")
4463
        private Person findOrCreateAuthor(String author2) {
4464
            //logger.info("findOrCreateAuthor");
4465
            List<UuidAndTitleCache<Person>> hiberPersons = importer.getAgentService().getPersonUuidAndTitleCache();
4466
            for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
4467
                if(hibernateP.getTitleCache().equals(author2)) {
4468
                    AgentBase existing = importer.getAgentService().find(hibernateP.getUuid());
4469
                    return CdmBase.deproxy(existing, Person.class);
4470
                }
4471
            }
4472
            Person p = Person.NewInstance();
4473
            p.setTitleCache(author2,true);
4474
            importer.getAgentService().saveOrUpdate(p);
4475
            return CdmBase.deproxy(p, Person.class);
4476
        }
4477
        /**
4478
         * @param author the author to set
4479
         */
4480
        public void setAuthor(String author) {
4481
            this.author = author;
4482
        }
4483

    
4484
        /**
4485
         * @return the higherTaxa
4486
         */
4487
        public Taxon getHigherTaxa() {
4488
            return higherTaxa;
4489
        }
4490
        /**
4491
         * @param higherTaxa the higherTaxa to set
4492
         */
4493
        public void setHigherTaxa(Taxon higherTaxa) {
4494
            this.higherTaxa = higherTaxa;
4495
        }
4496
        /**
4497
         * @return the higherRank
4498
         */
4499
        public Rank getHigherRank() {
4500
            return higherRank;
4501
        }
4502
        /**
4503
         * @param higherRank the higherRank to set
4504
         */
4505
        public void setHigherRank(Rank higherRank) {
4506
            this.higherRank = higherRank;
4507
        }
4508
        public String getName(){
4509
            if (newName.isEmpty()) {
4510
                return originalName;
4511
            } else {
4512
                return newName;
4513
            }
4514

    
4515
        }
4516
        /**
4517
         * @return the fullName
4518
         */
4519
        public String getOriginalName() {
4520
            return originalName;
4521
        }
4522
        /**
4523
         * @param fullName the fullName to set
4524
         */
4525
        public void setOriginalName(String fullName) {
4526
            this.originalName = fullName;
4527
        }
4528
        /**
4529
         * @return the newName
4530
         */
4531
        public String getNewName() {
4532
            return newName;
4533
        }
4534
        /**
4535
         * @param newName the newName to set
4536
         */
4537
        public void setNewName(String newName) {
4538
            this.newName = newName;
4539
        }
4540
        /**
4541
         * @return the rank
4542
         */
4543
        public Rank getRank() {
4544
            return rank;
4545
        }
4546
        /**
4547
         * @param rank the rank to set
4548
         */
4549
        public void setRank(Rank rank) {
4550
            this.rank = rank;
4551
        }
4552
        /**
4553
         * @return the idenfitiger
4554
         */
4555
        public String getIdentifier() {
4556
            return identifier;
4557
        }
4558
        /**
4559
         * @param idenfitiger the idenfitiger to set
4560
         */
4561
        public void setIdentifier(String identifier) {
4562
            this.identifier = identifier;
4563
        }
4564
        /**
4565
         * @return the status
4566
         */
4567
        public String getStatus() {
4568
            if (status == null) {
4569
                return "";
4570
            }
4571
            return status;
4572
        }
4573
        /**
4574
         * @param status the status to set
4575
         */
4576
        public void setStatus(String status) {
4577
            this.status = status;
4578
        }
4579
        /**
4580
         * @return the family
4581
         */
4582
        public Taxon getFamily() {
4583
            return family;
4584
        }
4585
        /**
4586
         * @param family the family to set
4587
         */
4588
        @SuppressWarnings("rawtypes")
4589
        public void setFamily(Taxon family) {
4590
            this.family = family;
4591
            familyName = CdmBase.deproxy(family.getName());
4592
        }
4593
        /**
4594
         * @return the subfamily
4595
         */
4596
        public Taxon getSubfamily() {
4597
            return subfamily;
4598
        }
4599
        /**
4600
         * @param subfamily the subfamily to set
4601
         */
4602
        @SuppressWarnings("rawtypes")
4603
        public void setSubfamily(Taxon subfamily) {
4604
            this.subfamily = subfamily;
4605
            subfamilyName = CdmBase.deproxy(subfamily.getName());
4606
        }
4607
        /**
4608
         * @return the tribe
4609
         */
4610
        public Taxon getTribe() {
4611
            return tribe;
4612
        }
4613
        /**
4614
         * @param tribe the tribe to set
4615
         */
4616
        @SuppressWarnings("rawtypes")
4617
        public void setTribe(Taxon tribe) {
4618
            this.tribe = tribe;
4619
            tribeName = CdmBase.deproxy(tribe.getName());
4620
        }
4621
        /**
4622
         * @return the subtribe
4623
         */
4624
        public Taxon getSubtribe() {
4625
            return subtribe;
4626
        }
4627
        /**
4628
         * @param subtribe the subtribe to set
4629
         */
4630
        @SuppressWarnings("rawtypes")
4631
        public void setSubtribe(Taxon subtribe) {
4632
            this.subtribe = subtribe;
4633
            subtribeName =CdmBase.deproxy(subtribe.getName());
4634
        }
4635
        /**
4636
         * @return the genus
4637
         */
4638
        public Taxon getGenus() {
4639
            return genus;
4640
        }
4641
        /**
4642
         * @param genus the genus to set
4643
         */
4644
        @SuppressWarnings("rawtypes")
4645
        public void setGenus(Taxon genus) {
4646
            if (genus != null){
4647
	        	this.genus = genus;
4648
	            genusName = CdmBase.deproxy(genus.getName());
4649
            }
4650
        }
4651
        /**
4652
         * @return the subgenus
4653
         */
4654
        public Taxon getSubgenus() {
4655
            return subgenus;
4656
        }
4657
        /**
4658
         * @param subgenus the subgenus to set
4659
         */
4660
        @SuppressWarnings("rawtypes")
4661
        public void setSubgenus(Taxon subgenus) {
4662
            this.subgenus = subgenus;
4663
            subgenusName = CdmBase.deproxy(subgenus.getName());
4664
        }
4665
        /**
4666
         * @return the species
4667
         */
4668
        public Taxon getSpecies() {
4669
            return species;
4670
        }
4671
        /**
4672
         * @param species the species to set
4673
         */
4674
        public void setSpecies(Taxon species) {
4675
        	if (species != null){
4676
	            this.species = species;
4677
	            speciesName = CdmBase.deproxy(species.getName());
4678
        	}
4679
        }
4680
        /**
4681
         * @return the subspecies
4682
         */
4683
        public Taxon getSubspecies() {
4684
            return subspecies;
4685
        }
4686
        /**
4687
         * @param subspecies the subspecies to set
4688
         */
4689
        @SuppressWarnings("rawtypes")
4690
        public void setSubspecies(Taxon subspecies) {
4691
            this.subspecies = subspecies;
4692
            subspeciesName = CdmBase.deproxy(subspecies.getName());
4693

    
4694
        }
4695

    
4696

    
4697

    
4698
    }
4699

    
4700

    
4701
    /**
4702
     * @param status
4703
     */
4704
    private void addProblematicStatusToFile(String status) {
4705
        try{
4706
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "StatusUnknown_"+classification.getTitleCache()+".txt",true);
4707
            BufferedWriter out = new BufferedWriter(fstream);
4708
            out.write(status+"\n");
4709
            //Close the output stream
4710
            out.close();
4711
        }catch (Exception e){//Catch exception if any
4712
            System.err.println("Error: " + e.getMessage());
4713
        }
4714

    
4715
    }
4716

    
4717

    
4718

    
4719
    /**
4720
     * @param tnb
4721
     * @return
4722
     */
4723
    private Taxon findMatchingTaxon(INonViralName tnb, Reference refMods) {
4724
        logger.info("findMatchingTaxon");
4725
        Taxon tmp=null;
4726

    
4727
        refMods=CdmBase.deproxy(refMods, Reference.class);
4728
        boolean insertAsExisting =false;
4729
        List<Taxon> existingTaxa = new ArrayList<Taxon>();
4730
        try {
4731
            existingTaxa = getMatchingTaxa(TaxonName.castAndDeproxy(tnb));
4732
        } catch (Exception e1) {
4733
            // TODO Auto-generated catch block
4734
            e1.printStackTrace();
4735
        }
4736
        double similarityScore=0.0;
4737
        double similarityAuthor=-1;
4738
        String author1="";
4739
        String author2="";
4740
        String t1="";
4741
        String t2="";
4742
        for (Taxon bestMatchingTaxon : existingTaxa){
4743
            if (!existingTaxa.isEmpty() && state2.getConfig().isInteractWithUser() && !insertAsExisting) {
4744
                //                System.out.println("tnb "+tnb.getTitleCache());
4745
                //                System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4746
                try {
4747
                    if(tnb.getAuthorshipCache()!=null) {
4748
                        author1=tnb.getAuthorshipCache();
4749
                    }
4750
                } catch (Exception e) {
4751
                    // TODO Auto-generated catch block
4752
                    e.printStackTrace();
4753
                }
4754
                try {
4755
                    if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
4756
                        author2=bestMatchingTaxon.getName().getAuthorshipCache();
4757
                    }
4758
                } catch (Exception e) {
4759
                    // TODO Auto-generated catch block
4760
                    e.printStackTrace();
4761
                }
4762
                try {
4763
                    t1=tnb.getTitleCache().split("sec.")[0].trim();
4764
                    if (author1!=null && !StringUtils.isEmpty(author1)) {
4765
                        t1=t1.split(Pattern.quote(author1))[0];
4766
                    }
4767
                } catch (Exception e) {
4768
                    // TODO Auto-generated catch block
4769
                    e.printStackTrace();
4770
                }
4771
                try {
4772
                    t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
4773
                    if (author2!=null && !StringUtils.isEmpty(author2)) {
4774
                        t2=t2.split(Pattern.quote(author2))[0];
4775
                    }
4776
                } catch (Exception e) {
4777
                    // TODO Auto-generated catch block
4778
                    e.printStackTrace();
4779
                }
4780
                similarityScore=similarity(t1.trim(), t2.trim());
4781
                //                System.out.println("taxascore: "+similarityScore);
4782
                similarityAuthor=similarity(author1.trim(), author2.trim());
4783
                //                System.out.println("authorscore: "+similarityAuthor);
4784
                insertAsExisting = compareAndCheckTaxon(tnb, refMods, similarityScore, bestMatchingTaxon,similarityAuthor);
4785
            }
4786
            if(insertAsExisting) {
4787
                //System.out.println("KEEP "+bestMatchingTaxon.toString());
4788
                tmp=bestMatchingTaxon;
4789
                sourceHandler.addSource(refMods, tmp);
4790
                return tmp;
4791
            }
4792
        }
4793
        return tmp;
4794
    }
4795

    
4796

    
4797
    /**
4798
     * @param tnb
4799
     * @param refMods
4800
     * @param similarityScore
4801
     * @param bestMatchingTaxon
4802
     * @param similarityAuthor
4803
     * @return
4804
     */
4805
    private boolean compareAndCheckTaxon(INonViralName tnb, Reference refMods, double similarityScore,
4806
            Taxon bestMatchingTaxon, double similarityAuthor) {
4807
        //logger.info("compareAndCheckTaxon");
4808
        boolean insertAsExisting;
4809
        //        if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4810
        //            insertAsExisting=false;
4811
        //        } else{
4812
        //a small hack/automatisation for Chenopodium only
4813
        if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4814
                bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4815
            insertAsExisting=true;
4816
        } else {
4817
            insertAsExisting=askIfReuseBestMatchingTaxon(tnb, bestMatchingTaxon, refMods, similarityScore,similarityAuthor);
4818
        }
4819
        //        }
4820

    
4821
        logDecision(tnb, bestMatchingTaxon, insertAsExisting, refMods);
4822
        return insertAsExisting;
4823
    }
4824

    
4825
    /**
4826
     * @return
4827
     */
4828
    @SuppressWarnings("rawtypes")
4829
    private List<Taxon> getMatchingTaxa(TaxonName tnb) {
4830
        //logger.info("getMatchingTaxon");
4831
    	if (tnb.getTitleCache() == null){
4832
    		tnb.setTitleCache(tnb.toString(), tnb.isProtectedTitleCache());
4833
    	}
4834

    
4835
        Pager<TaxonBase> pager=importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, tnb.getTitleCache().split("sec.")[0].trim(), MatchMode.BEGINNING, null, null, null, null, null);
4836
        List<TaxonBase>records = pager.getRecords();
4837

    
4838
        List<Taxon> existingTaxons = new ArrayList<Taxon>();
4839
        for (TaxonBase r:records){
4840
            try{
4841
                Taxon bestMatchingTaxon = (Taxon)r;
4842
                //                System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4843
                if(compareTaxonNameLength(bestMatchingTaxon.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4844
                    existingTaxons.add(bestMatchingTaxon);
4845
                }
4846
            }catch(ClassCastException e){logger.warn("classcast exception, might be a synonym, ignore it");}
4847
        }
4848
        Taxon bmt = importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
4849
        if (!existingTaxons.contains(bmt) && bmt!=null) {
4850
            if(compareTaxonNameLength(bmt.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4851
                existingTaxons.add(bmt);
4852
            }
4853
        }
4854
        return existingTaxons;
4855
    }
4856

    
4857
    /**
4858
     * Check if the found Taxon can reasonnably be the same
4859
     * example: with and without author should match, but the subspecies should not be suggested for a genus
4860
     * */
4861
    private boolean compareTaxonNameLength(String f, String o){
4862
        //logger.info("compareTaxonNameLength");
4863
        boolean lengthOk=false;
4864
        int sizeF = f.length();
4865
        int sizeO = o.length();
4866
        if (sizeO>=sizeF) {
4867
            lengthOk=true;
4868
        }
4869
        if(sizeF>sizeO) {
4870
            if (sizeF-sizeO>10) {
4871
                lengthOk=false;
4872
            } else {
4873
                lengthOk=true;
4874
            }
4875
        }
4876

    
4877
        //        System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4878
        return lengthOk;
4879
    }
4880

    
4881
    private double similarity(String s1, String s2) {
4882
        //logger.info("similarity");
4883
        //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4884
        if(!StringUtils.isEmpty(s1) && !StringUtils.isEmpty(s2)){
4885
            String l1=s1.toLowerCase().trim();
4886
            String l2=s2.toLowerCase().trim();
4887
            if (l1.length() < l2.length()) { // s1 should always be bigger
4888
                String swap = l1; l1 = l2; l2 = swap;
4889
            }
4890
            int bigLen = l1.length();
4891
            if (bigLen == 0) { return 1.0; /* both strings are zero length */ }
4892
            return (bigLen - computeEditDistance(l1, l2)) / (double) bigLen;
4893
        }
4894
        else{
4895
            if(s1!=null && s2!=null){
4896
                if (s1.equalsIgnoreCase(s2)) {
4897
                    return 1;
4898
                }
4899
            }
4900
            return -1;
4901
        }
4902
    }
4903

    
4904
    private int computeEditDistance(String s1, String s2) {
4905
        //logger.info("computeEditDistance");
4906
        int[] costs = new int[s2.length() + 1];
4907
        for (int i = 0; i <= s1.length(); i++) {
4908
            int lastValue = i;
4909
            for (int j = 0; j <= s2.length(); j++) {
4910
                if (i == 0) {
4911
                    costs[j] = j;
4912
                } else {
4913
                    if (j > 0) {
4914
                        int newValue = costs[j - 1];
4915
                        if (s1.charAt(i - 1) != s2.charAt(j - 1)) {
4916
                            newValue = Math.min(Math.min(newValue, lastValue),
4917
                                    costs[j]) + 1;
4918
                        }
4919
                        costs[j - 1] = lastValue;
4920
                        lastValue = newValue;
4921
                    }
4922
                }
4923
            }
4924
            if (i > 0) {
4925
                costs[s2.length()] = lastValue;
4926
            }
4927
        }
4928
        return costs[s2.length()];
4929
    }
4930

    
4931
    Map<Rank, Taxon> hierarchy = new HashMap<>();
4932
    /**
4933
     * @param taxonName
4934
     */
4935
    @SuppressWarnings("rawtypes")
4936
    public void lookForParentNode(INonViralName taxonName, Taxon tax, Reference ref, MyName myName) {
4937
        logger.info("lookForParentNode "+taxonName.getTitleCache()+" for "+myName.toString());
4938
        //System.out.println("LOOK FOR PARENT NODE "+taxonname.toString()+"; "+tax.toString()+"; "+taxonname.getRank());
4939
        INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
4940
        if (taxonName.getRank().equals(Rank.FORM())){
4941
            handleFormHierarchy(ref, myName, parser);
4942
        }
4943
        else if (taxonName.getRank().equals(Rank.VARIETY())){
4944
            handleVarietyHierarchy(ref, myName, parser);
4945
        }
4946
        else if (taxonName.getRank().equals(Rank.SUBSPECIES())){
4947
            handleSubSpeciesHierarchy(ref, myName, parser);
4948
        }
4949
        else if (taxonName.getRank().equals(Rank.SPECIES())){
4950
            handleSpeciesHierarchy(ref, myName, parser);
4951
        }
4952
        else if (taxonName.getRank().equals(Rank.SUBGENUS())){
4953
            handleSubgenusHierarchy(ref, myName, parser);
4954
        }
4955

    
4956
        if (taxonName.getRank().equals(Rank.GENUS())){
4957
            handleGenusHierarchy(ref, myName, parser);
4958
        }
4959
        if (taxonName.getRank().equals(Rank.SUBTRIBE())){
4960
            handleSubtribeHierarchy(ref, myName, parser);
4961
        }
4962
        if (taxonName.getRank().equals(Rank.TRIBE())){
4963
            handleTribeHierarchy(ref, myName, parser);
4964
        }
4965

    
4966
        if (taxonName.getRank().equals(Rank.SUBFAMILY())){
4967
            handleSubfamilyHierarchy(ref, myName, parser);
4968
        }
4969
    }
4970

    
4971
    /**
4972
     * @param ref
4973
     * @param myName
4974
     * @param parser
4975
     */
4976
    private void handleSubfamilyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
4977
        System.out.println("handleSubfamilyHierarchy");
4978
        String parentStr = myName.getFamilyStr();
4979
        Rank r = Rank.FAMILY();
4980
        if(parentStr!=null){
4981

    
4982
            Taxon parent = null;
4983
            Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, parentStr, MatchMode.BEGINNING, null, null, null, null, null);
4984
            for(TaxonBase tb:taxontest.getRecords()){
4985
                try {
4986
                    if (tb.getName().getRank().equals(r)) {
4987
                        parent=CdmBase.deproxy(tb, Taxon.class);
4988
                    }
4989
                    break;
4990
                } catch (Exception e) {
4991
                    // TODO Auto-generated catch block
4992
                    e.printStackTrace();
4993
                }
4994
            }
4995
            if(parent == null) {
4996
                INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
4997
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
4998
                if(tmp ==null)
4999
                {
5000
                    parent=Taxon.NewInstance(parentNameName, ref);
5001
                    importer.getTaxonService().save(parent);
5002
                    parent = CdmBase.deproxy(parent, Taxon.class);
5003
                } else {
5004
                    parent=tmp;
5005
                }
5006
                lookForParentNode(parentNameName, parent, ref,myName);
5007

    
5008
            }
5009
            hierarchy.put(r,parent);
5010
        }
5011
    }
5012

    
5013
    /**
5014
     * @param ref
5015
     * @param myName
5016
     * @param parser
5017
     */
5018
    private void handleTribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5019
        String parentStr = myName.getSubfamilyStr();
5020
        Rank r = Rank.SUBFAMILY();
5021
        if (parentStr == null){
5022
            parentStr = myName.getFamilyStr();
5023
            r = Rank.FAMILY();
5024
        }
5025
        if(parentStr!=null){
5026
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5027
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5028
            //                    importer.getTaxonService().save(parent);
5029
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5030

    
5031
            boolean parentDoesNotExists = true;
5032
            for (TaxonNode p : classification.getAllNodes()){
5033
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5034
                    parentDoesNotExists = false;
5035
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5036
                    break;
5037
                }
5038
            }
5039
            //                if(parentDoesNotExists) {
5040
            //                    importer.getTaxonService().save(parent);
5041
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5042
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5043
            //                }
5044
            if(parentDoesNotExists) {
5045
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5046
                if(tmp ==null)
5047
                {
5048
                    parent=Taxon.NewInstance(parentNameName, ref);
5049
                    importer.getTaxonService().save(parent);
5050
                    parent = CdmBase.deproxy(parent, Taxon.class);
5051
                } else {
5052
                    parent=tmp;
5053
                }
5054
                lookForParentNode(parentNameName, parent, ref,myName);
5055

    
5056
            }
5057
            hierarchy.put(r,parent);
5058
        }
5059
    }
5060

    
5061
    /**
5062
     * @param ref
5063
     * @param myName
5064
     * @param parser
5065
     */
5066
    private void handleSubtribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5067
        String parentStr = myName.getTribeStr();
5068
        Rank r = Rank.TRIBE();
5069
        if (parentStr == null){
5070
            parentStr = myName.getSubfamilyStr();
5071
            r = Rank.SUBFAMILY();
5072
        }
5073
        if (parentStr == null){
5074
            parentStr = myName.getFamilyStr();
5075
            r = Rank.FAMILY();
5076
        }
5077
        if(parentStr!=null){
5078
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5079
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5080
            //                    importer.getTaxonService().save(parent);
5081
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5082

    
5083
            boolean parentDoesNotExists = true;
5084
            for (TaxonNode p : classification.getAllNodes()){
5085
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5086
                    parentDoesNotExists = false;
5087
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5088

    
5089
                    break;
5090
                }
5091
            }
5092
            //                if(parentDoesNotExists) {
5093
            //                    importer.getTaxonService().save(parent);
5094
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5095
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5096
            //                }
5097
            if(parentDoesNotExists) {
5098
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5099
                if(tmp ==null)
5100
                {
5101
                    parent=Taxon.NewInstance(parentNameName, ref);
5102
                    importer.getTaxonService().save(parent);
5103
                    parent = CdmBase.deproxy(parent, Taxon.class);
5104
                } else {
5105
                    parent=tmp;
5106
                }
5107
                lookForParentNode(parentNameName, parent, ref,myName);
5108

    
5109
            }
5110
            hierarchy.put(r,parent);
5111
        }
5112
    }
5113

    
5114
    /**
5115
     * @param ref
5116
     * @param myName
5117
     * @param parser
5118
     */
5119
    private void handleGenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5120
        String parentStr = myName.getSubtribeStr();
5121
        Rank r = Rank.SUBTRIBE();
5122
        if (parentStr == null){
5123
            parentStr = myName.getTribeStr();
5124
            r = Rank.TRIBE();
5125
        }
5126
        if (parentStr == null){
5127
            parentStr = myName.getSubfamilyStr();
5128
            r = Rank.SUBFAMILY();
5129
        }
5130
        if (parentStr == null){
5131
            parentStr = myName.getFamilyStr();
5132
            r = Rank.FAMILY();
5133
        }
5134
        if(parentStr!=null){
5135
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5136
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5137
            //                    importer.getTaxonService().save(parent);
5138
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5139

    
5140
            boolean parentDoesNotExist = true;
5141
            for (TaxonNode p : classification.getAllNodes()){
5142
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5143
                    //                        System.out.println(p.getTaxon().getUuid());
5144
                    //                        System.out.println(parent.getUuid());
5145
                    parentDoesNotExist = false;
5146
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5147
                    break;
5148
                }
5149
            }
5150
            //                if(parentDoesNotExists) {
5151
            //                    importer.getTaxonService().save(parent);
5152
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5153
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5154
            //                }
5155
            if(parentDoesNotExist) {
5156
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5157
                if(tmp ==null){
5158

    
5159
                    parent=Taxon.NewInstance(parentNameName, ref);
5160
                    importer.getTaxonService().save(parent);
5161
                    parent = CdmBase.deproxy(parent, Taxon.class);
5162
                } else {
5163
                    parent=tmp;
5164
                }
5165
                lookForParentNode(parentNameName, parent, ref,myName);
5166

    
5167
            }
5168
            hierarchy.put(r,parent);
5169
        }
5170
    }
5171

    
5172
    /**
5173
     * @param ref
5174
     * @param myName
5175
     * @param parser
5176
     */
5177
    private void handleSubgenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5178
        String parentStr = myName.getGenusStr();
5179
        Rank r = Rank.GENUS();
5180

    
5181
        if(parentStr==null){
5182
            parentStr = myName.getSubtribeStr();
5183
            r = Rank.SUBTRIBE();
5184
        }
5185
        if (parentStr == null){
5186
            parentStr = myName.getTribeStr();
5187
            r = Rank.TRIBE();
5188
        }
5189
        if (parentStr == null){
5190
            parentStr = myName.getSubfamilyStr();
5191
            r = Rank.SUBFAMILY();
5192
        }
5193
        if (parentStr == null){
5194
            parentStr = myName.getFamilyStr();
5195
            r = Rank.FAMILY();
5196
        }
5197
        if(parentStr!=null){
5198
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5199
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5200
            //                    importer.getTaxonService().save(parent);
5201
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5202

    
5203
            boolean parentDoesNotExists = true;
5204
            for (TaxonNode p : classification.getAllNodes()){
5205
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5206
                    //                        System.out.println(p.getTaxon().getUuid());
5207
                    //                        System.out.println(parent.getUuid());
5208
                    parentDoesNotExists = false;
5209
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5210
                    break;
5211
                }
5212
            }
5213
            //                if(parentDoesNotExists) {
5214
            //                    importer.getTaxonService().save(parent);
5215
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5216
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5217
            //                }
5218
            if(parentDoesNotExists) {
5219
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5220
                if(tmp ==null)
5221
                {
5222
                    parent=Taxon.NewInstance(parentNameName, ref);
5223
                    importer.getTaxonService().save(parent);
5224
                    parent = CdmBase.deproxy(parent, Taxon.class);
5225
                } else {
5226
                    parent=tmp;
5227
                }
5228
                lookForParentNode(parentNameName, parent, ref,myName);
5229

    
5230
            }
5231
            hierarchy.put(r,parent);
5232
        }
5233
    }
5234

    
5235
    /**
5236
     * @param ref
5237
     * @param myName
5238
     * @param parser
5239
     */
5240
    private void handleSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5241
        String parentStr = myName.getSubgenusStr();
5242
        Rank r = Rank.SUBGENUS();
5243

    
5244
        if(parentStr==null){
5245
            parentStr = myName.getGenusStr();
5246
            r = Rank.GENUS();
5247
        }
5248

    
5249
        if(parentStr==null){
5250
            parentStr = myName.getSubtribeStr();
5251
            r = Rank.SUBTRIBE();
5252
        }
5253
        if (parentStr == null){
5254
            parentStr = myName.getTribeStr();
5255
            r = Rank.TRIBE();
5256
        }
5257
        if (parentStr == null){
5258
            parentStr = myName.getSubfamilyStr();
5259
            r = Rank.SUBFAMILY();
5260
        }
5261
        if (parentStr == null){
5262
            parentStr = myName.getFamilyStr();
5263
            r = Rank.FAMILY();
5264
        }
5265
        if(parentStr!=null){
5266
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5267
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5268
            hierarchy.put(r,parent);
5269
        }
5270
    }
5271

    
5272
    /**
5273
     * @param ref
5274
     * @param myName
5275
     * @param parser
5276
     */
5277
    private void handleSubSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5278
        String parentStr = myName.getSpeciesStr();
5279
        Rank r = Rank.SPECIES();
5280

    
5281

    
5282
        if(parentStr==null){
5283
            parentStr = myName.getSubgenusStr();
5284
            r = Rank.SUBGENUS();
5285
        }
5286

    
5287
        if(parentStr==null){
5288
            parentStr = myName.getGenusStr();
5289
            r = Rank.GENUS();
5290
        }
5291

    
5292
        if(parentStr==null){
5293
            parentStr = myName.getSubtribeStr();
5294
            r = Rank.SUBTRIBE();
5295
        }
5296
        if (parentStr == null){
5297
            parentStr = myName.getTribeStr();
5298
            r = Rank.TRIBE();
5299
        }
5300
        if (parentStr == null){
5301
            parentStr = myName.getSubfamilyStr();
5302
            r = Rank.SUBFAMILY();
5303
        }
5304
        if (parentStr == null){
5305
            parentStr = myName.getFamilyStr();
5306
            r = Rank.FAMILY();
5307
        }
5308
        if(parentStr!=null){
5309
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5310
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5311
            hierarchy.put(r,parent);
5312
        }
5313
    }
5314

    
5315

    
5316
    /**
5317
     * @param ref
5318
     * @param myName
5319
     * @param parser
5320
     */
5321
    private void handleFormHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5322
        String parentStr = myName.getSubspeciesStr();
5323
        Rank r = Rank.SUBSPECIES();
5324

    
5325

    
5326
        if(parentStr==null){
5327
            parentStr = myName.getSpeciesStr();
5328
            r = Rank.SPECIES();
5329
        }
5330

    
5331
        if(parentStr==null){
5332
            parentStr = myName.getSubgenusStr();
5333
            r = Rank.SUBGENUS();
5334
        }
5335

    
5336
        if(parentStr==null){
5337
            parentStr = myName.getGenusStr();
5338
            r = Rank.GENUS();
5339
        }
5340

    
5341
        if(parentStr==null){
5342
            parentStr = myName.getSubtribeStr();
5343
            r = Rank.SUBTRIBE();
5344
        }
5345
        if (parentStr == null){
5346
            parentStr = myName.getTribeStr();
5347
            r = Rank.TRIBE();
5348
        }
5349
        if (parentStr == null){
5350
            parentStr = myName.getSubfamilyStr();
5351
            r = Rank.SUBFAMILY();
5352
        }
5353
        if (parentStr == null){
5354
            parentStr = myName.getFamilyStr();
5355
            r = Rank.FAMILY();
5356
        }
5357
        if(parentStr!=null){
5358
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5359
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5360
            hierarchy.put(r,parent);
5361
        }
5362
    }
5363

    
5364
    /**
5365
     * @param ref
5366
     * @param myName
5367
     * @param parser
5368
     */
5369
    private void handleVarietyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5370
        String parentStr = myName.getSubspeciesStr();
5371
        Rank r = Rank.SUBSPECIES();
5372

    
5373
        if(parentStr==null){
5374
            parentStr = myName.getSpeciesStr();
5375
            r = Rank.SPECIES();
5376
        }
5377

    
5378
        if(parentStr==null){
5379
            parentStr = myName.getSubgenusStr();
5380
            r = Rank.SUBGENUS();
5381
        }
5382

    
5383
        if(parentStr==null){
5384
            parentStr = myName.getGenusStr();
5385
            r = Rank.GENUS();
5386
        }
5387

    
5388
        if(parentStr==null){
5389
            parentStr = myName.getSubtribeStr();
5390
            r = Rank.SUBTRIBE();
5391
        }
5392
        if (parentStr == null){
5393
            parentStr = myName.getTribeStr();
5394
            r = Rank.TRIBE();
5395
        }
5396
        if (parentStr == null){
5397
            parentStr = myName.getSubfamilyStr();
5398
            r = Rank.SUBFAMILY();
5399
        }
5400
        if (parentStr == null){
5401
            parentStr = myName.getFamilyStr();
5402
            r = Rank.FAMILY();
5403
        }
5404
        if(parentStr!=null){
5405
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5406
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5407
            hierarchy.put(r,parent);
5408
        }
5409
    }
5410

    
5411
    /**
5412
     * @param ref
5413
     * @param myName
5414
     * @param parser
5415
     * @param parentStr
5416
     * @param r
5417
     * @return
5418
     */
5419
    private Taxon handleParentName(Reference ref, MyName myName, INonViralNameParser<?> parser, String parentStr, Rank r) {
5420
        INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5421
        Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5422
        //                    importer.getTaxonService().save(parent);
5423
        //                    parent = CdmBase.deproxy(parent, Taxon.class);
5424

    
5425
        boolean parentDoesNotExists = true;
5426
        for (TaxonNode p : classification.getAllNodes()){
5427
            if(p.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent.getTitleCache().split("sec.")[0].trim())) {
5428
                //                        System.out.println(p.getTaxon().getUuid());
5429
                //                        System.out.println(parent.getUuid());
5430
                parentDoesNotExists = false;
5431
                parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5432
                break;
5433
            }
5434
        }
5435
        if(parentDoesNotExists) {
5436
            Taxon tmp = findMatchingTaxon(parentNameName,ref);
5437
            //                    System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5438
            if(tmp ==null){
5439

    
5440
                parent=Taxon.NewInstance(parentNameName, ref);
5441
                importer.getTaxonService().save(parent);
5442

    
5443
            } else {
5444
                parent=tmp;
5445
            }
5446
            lookForParentNode(parentNameName, parent, ref,myName);
5447

    
5448
        }
5449
        return parent;
5450
    }
5451

    
5452
    private void addNameDifferenceToFile(String originalname, String atomisedname){
5453
        try{
5454
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NamesDifferent_"+classification.getTitleCache()+".txt",true);
5455
            BufferedWriter out = new BufferedWriter(fstream);
5456
            out.write(originalname+" (original) versus "+replaceNull(atomisedname)+" (atomised) \n");
5457
            //Close the output stream
5458
            out.close();
5459
        }catch (Exception e){//Catch exception if any
5460
            System.err.println("Error: " + e.getMessage());
5461
        }
5462
    }
5463
    /**
5464
     * @param name
5465
     * @param author
5466
     * @param nomenclaturalCode2
5467
     * @param rank
5468
     */
5469
    private void addProblemNameToFile(String name, String author, NomenclaturalCode nomenclaturalCode2, Rank rank) {
5470
        try{
5471
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed.txt",true);
5472
            BufferedWriter out = new BufferedWriter(fstream);
5473
            out.write(name+"\t"+replaceNull(author)+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\n");
5474
            //Close the output stream
5475
            out.close();
5476
        }catch (Exception e){//Catch exception if any
5477
            System.err.println("Error: " + e.getMessage());
5478
        }
5479
    }
5480

    
5481

    
5482
    /**
5483
     * @param tnb
5484
     * @param bestMatchingTaxon
5485
     * @param insertAsExisting
5486
     * @param refMods
5487
     */
5488
    private void logDecision(INonViralName tnb, Taxon bestMatchingTaxon, boolean insertAsExisting, Reference refMods) {
5489
        try{
5490
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "Decisions_"+classification.toString()+".txt", true);
5491
            BufferedWriter out = new BufferedWriter(fstream);
5492
            out.write(tnb.getTitleCache() + " sec. " + refMods + "\t" + bestMatchingTaxon.getTitleCache() + "\t" + insertAsExisting + "\n");
5493
            //Close the output stream
5494
            out.close();
5495
        }catch (Exception e){//Catch exception if any
5496
            System.err.println("Error: " + e.getMessage());
5497
        }
5498
    }
5499

    
5500

    
5501
    @SuppressWarnings("unused")
5502
    private String replaceNull(Object in){
5503
        if (in == null) {
5504
            return "";
5505
        }
5506
        if (in.getClass().equals(NomenclaturalCode.class)) {
5507
            return ((NomenclaturalCode)in).getTitleCache();
5508
        }
5509
        return in.toString();
5510
    }
5511

    
5512
    /**
5513
     * @param fullName
5514
     * @param nomenclaturalCode2
5515
     * @param rank
5516
     */
5517
    private void addProblemNameToFile(String type, String name, NomenclaturalCode nomenclaturalCode2, Rank rank, String problems) {
5518
        try{
5519
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed_"+classification.getTitleCache()+".txt",true);
5520
            BufferedWriter out = new BufferedWriter(fstream);
5521
            out.write(type+"\t"+name+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\t"+problems+"\n");
5522
            //Close the output stream
5523
            out.close();
5524
        }catch (Exception e){//Catch exception if any
5525
            System.err.println("Error: " + e.getMessage());
5526
        }
5527

    
5528
    }
5529

    
5530
}
5531

    
5532

    
5533

    
(8-8/9)