Project

General

Profile

Download (232 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
 * Copyright (C) 2013 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9
package eu.etaxonomy.cdm.io.taxonx2013;
10

    
11
import java.io.BufferedWriter;
12
import java.io.File;
13
import java.io.FileWriter;
14
import java.io.IOException;
15
import java.net.URI;
16
import java.util.ArrayList;
17
import java.util.Arrays;
18
import java.util.HashMap;
19
import java.util.List;
20
import java.util.Map;
21
import java.util.Set;
22
import java.util.UUID;
23
import java.util.regex.Matcher;
24
import java.util.regex.Pattern;
25

    
26
import javax.xml.transform.TransformerException;
27
import javax.xml.transform.TransformerFactoryConfigurationError;
28

    
29
import org.apache.commons.lang.StringUtils;
30
import org.apache.log4j.Logger;
31
import org.w3c.dom.Node;
32
import org.w3c.dom.NodeList;
33

    
34
import com.ibm.lsid.MalformedLSIDException;
35

    
36
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
37
import eu.etaxonomy.cdm.api.service.pager.Pager;
38
import eu.etaxonomy.cdm.model.agent.AgentBase;
39
import eu.etaxonomy.cdm.model.agent.Person;
40
import eu.etaxonomy.cdm.model.common.CdmBase;
41
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
42
import eu.etaxonomy.cdm.model.common.LSID;
43
import eu.etaxonomy.cdm.model.common.Language;
44
import eu.etaxonomy.cdm.model.description.Feature;
45
import eu.etaxonomy.cdm.model.description.FeatureNode;
46
import eu.etaxonomy.cdm.model.description.FeatureTree;
47
import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
48
import eu.etaxonomy.cdm.model.description.TaxonDescription;
49
import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
50
import eu.etaxonomy.cdm.model.description.TextData;
51
import eu.etaxonomy.cdm.model.name.INonViralName;
52
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
53
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
54
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
55
import eu.etaxonomy.cdm.model.name.Rank;
56
import eu.etaxonomy.cdm.model.name.TaxonName;
57
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
58
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
59
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
60
import eu.etaxonomy.cdm.model.reference.Reference;
61
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
62
import eu.etaxonomy.cdm.model.taxon.Classification;
63
import eu.etaxonomy.cdm.model.taxon.Synonym;
64
import eu.etaxonomy.cdm.model.taxon.SynonymType;
65
import eu.etaxonomy.cdm.model.taxon.Taxon;
66
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
67
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
68
import eu.etaxonomy.cdm.persistence.dto.UuidAndTitleCache;
69
import eu.etaxonomy.cdm.persistence.query.MatchMode;
70
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
71
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
72
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
73
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImplRegExBase;
74

    
75
/**
76
 * @author pkelbert
77
 * @since 2 avr. 2013
78
 *
79
 */
80
public class TaxonXTreatmentExtractor extends TaxonXExtractor{
81

    
82
    private static final String PUBLICATION_YEAR = "publicationYear";
83

    
84
	private static final Logger logger = Logger.getLogger(TaxonXTreatmentExtractor.class);
85

    
86
    private static final String notMarkedUp = "Not marked-up";
87
    private static final UUID proIbioTreeUUID = UUID.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
88
    private static final UUID OtherUUID = UUID.fromString("6465f8aa-2175-446f-807e-7163994b120f");
89
    private static final UUID NotMarkedUpUUID = UUID.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
90
    private static final boolean skippQuestion = true;
91

    
92
    private final NomenclaturalCode nomenclaturalCode;
93
    private Classification classification;
94

    
95
    private  String treatmentMainName,originalTreatmentName;
96

    
97
    private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
98

    
99

    
100
    private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
101
    private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
102

    
103
    private boolean maxRankRespected =false;
104
    private Map<String, Feature> featuresMap;
105

    
106
    private MyName currentMyName;
107

    
108
    private Reference sourceUrlRef;
109

    
110
    private String followingText;  //text element immediately following a tax:name in tax:nomenclature TODO move do state
111
    private String usedFollowingTextPrefix; //the part of the following text which has been used during taxon name creation
112

    
113
    private final TaxonXAddSources sourceHandler = new TaxonXAddSources();
114

    
115
    /**
116
     * @param nomenclaturalCode
117
     * @param classification
118
     * @param importer
119
     * @param configState
120
     */
121
    public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
122
            TaxonXImportState configState,Map<String, Feature> featuresMap,  Reference urlSource) {
123
        this.nomenclaturalCode=nomenclaturalCode;
124
        this.classification = classification;
125
        this.importer=importer;
126
        this.state2=configState;
127
        this.featuresMap=featuresMap;
128
        this.sourceUrlRef =urlSource;
129
        prepareCollectors(configState, importer.getAgentService());
130
        this.sourceHandler.setSourceUrlRef(sourceUrlRef);
131
        this.sourceHandler.setImporter(importer);
132
        this.sourceHandler.setConfigState(configState);
133
    }
134

    
135
    /**
136
     * extracts all the treament information and save them
137
     * @param treatmentnode: the XML Node
138
     * @param tosave: the list of object to save into the CDM
139
     * @param refMods: the reference extracted from the MODS
140
     * @param sourceName: the URI of the document
141
     */
142
    @SuppressWarnings({ "rawtypes", "unused" })
143

    
144
    protected void extractTreatment(Node treatmentnode, Reference refMods, URI sourceName) {        logger.info("extractTreatment");
145
        List<TaxonName> namesToSave = new ArrayList<TaxonName>();
146
        NodeList children = treatmentnode.getChildNodes();
147
        Taxon acceptedTaxon =null;
148
        boolean hasRefgroup=false;
149

    
150
        //needed?
151
        for (int i=0;i<children.getLength();i++){
152
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
153
            	hasRefgroup=true;
154
            }
155
        }
156

    
157
        for (int i=0;i<children.getLength();i++){
158
        	Node child = children.item(i);
159
    		acceptedTaxon = handleSingleNode(refMods, sourceName, namesToSave, child, acceptedTaxon);
160
        }
161
        //        logger.info("saveUpdateNames");
162
        if (maxRankRespected){
163
            importer.getNameService().saveOrUpdate(namesToSave);
164
            importer.getClassificationService().saveOrUpdate(classification);
165
            //logger.info("saveUpdateNames-ok");
166
        }
167

    
168
        buildFeatureTree();
169
    }
170

    
171
	private Taxon handleSingleNode(Reference refMods, URI sourceName,
172
			List<TaxonName> namesToSave, Node child, Taxon acceptedTaxon) {
173
		Taxon defaultTaxon =null;
174

    
175
		String nodeName = child.getNodeName();
176
		if (nodeName.equalsIgnoreCase("tax:nomenclature")){
177
		    NodeList nomenclatureChildren = child.getChildNodes();
178
		    boolean containsName = false;
179
		    for(int k=0; k<nomenclatureChildren.getLength(); k++){
180
		        if(nomenclatureChildren.item(k).getNodeName().equalsIgnoreCase("tax:name")){
181
		            containsName=true;
182
		            break;
183
		        }
184
		    }
185
		    if (containsName){
186
		        reloadClassification();
187
		        //extract "main" the scientific name
188
		        try{
189
		            acceptedTaxon = extractNomenclature(child, namesToSave, refMods);
190
		        }catch(ClassCastException e){
191
		        	//FIXME exception handling
192
		        	e.printStackTrace();
193
		        }
194
		        //                    System.out.println("acceptedTaxon : "+acceptedTaxon);
195
		    }
196
		}else if (nodeName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
197
		    reloadClassification();
198
		    //extract the References within the document
199
		    extractReferences(child, namesToSave ,acceptedTaxon,refMods);
200
		}else if (nodeName.equalsIgnoreCase("tax:div") &&
201
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
202
		    File file = new File(TaxonXImport.LOG_FOLDER + "multipleTaxonX.txt");
203
		    FileWriter writer;
204
		    try {
205
		        writer = new FileWriter(file ,true);
206
		        writer.write(sourceName+"\n");
207
		        writer.flush();
208
		        writer.close();
209
		    } catch (IOException e1) {
210
		        // TODO Auto-generated catch block
211
		        logger.error(e1.getMessage());
212
		    }
213
		    //                String multiple = askMultiple(children.item(i));
214
		    String multiple = "Other";
215
		    if (multiple.equalsIgnoreCase("other")) {
216
		        extractSpecificFeatureNotStructured(child,acceptedTaxon, defaultTaxon,namesToSave, refMods,multiple);
217
		    }else if (multiple.equalsIgnoreCase("synonyms")) {
218
		        try{
219
		            extractSynonyms(child,acceptedTaxon, refMods, null);
220
		        }catch(NullPointerException e){
221
		            logger.warn("the accepted taxon is maybe null");
222
		        }
223
		    }else if(multiple.equalsIgnoreCase("material examined")){
224
		    	extractMaterials(child, acceptedTaxon, refMods, namesToSave);
225
		    }else if (multiple.equalsIgnoreCase("distribution")){
226
		    	extractDistribution(child, acceptedTaxon, defaultTaxon, namesToSave, refMods);
227
		    }else if (multiple.equalsIgnoreCase("type status")){
228
		    	extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, "TypeStatus");
229
		    }else if (multiple.equalsIgnoreCase("vernacular name")){
230
		    	extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
231
		    }else{
232
		    	extractSpecificFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,multiple);
233
		    }
234
		}
235
		else if(nodeName.equalsIgnoreCase("tax:div") &&
236
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
237
		    extractFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, Feature.BIOLOGY_ECOLOGY());
238
		}
239
		else if(nodeName.equalsIgnoreCase("tax:div") &&
240
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected){
241
		    extractDescriptionWithReference(child, acceptedTaxon,defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
242
		}
243
		else if(nodeName.equalsIgnoreCase("tax:div") &&
244
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
245
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
246
		}
247
		else if(nodeName.equalsIgnoreCase("tax:div") &&
248
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
249
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,Feature.DIAGNOSIS());
250
		}
251
		else if(nodeName.equalsIgnoreCase("tax:div") &&
252
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
253
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DISCUSSION());
254
		}
255
		else if(nodeName.equalsIgnoreCase("tax:div") &&
256
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected){
257
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
258
		}
259
		else if(nodeName.equalsIgnoreCase("tax:div") &&
260
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
261
		    extractDistribution(child,acceptedTaxon,defaultTaxon,namesToSave, refMods);
262
		}
263
		else if(nodeName.equalsIgnoreCase("tax:div") &&
264
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
265
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave,refMods,Feature.ETYMOLOGY());
266
		}
267
		else if(nodeName.equalsIgnoreCase("tax:div") &&
268
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
269
		    extractMaterials(child,acceptedTaxon, refMods, namesToSave);
270
		}
271
		else if(nodeName.equalsIgnoreCase("tax:figure") && maxRankRespected){
272
		    extractSpecificFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, "Figure");
273
		}
274
		else if(nodeName.equalsIgnoreCase("tax:div") &&
275
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected){
276
		    extractSpecificFeature(child, acceptedTaxon,defaultTaxon, namesToSave, refMods, "table");
277
		}else if(nodeName.equalsIgnoreCase("tax:div") &&
278
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
279
		    //TODO IGNORE keys for the moment
280
		    //extractKey(children.item(i),acceptedTaxon, nameToSave,source, refMods);
281
		    extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,"Keys - unparsed");
282
		}
283
		else{
284
		    if (! nodeName.equalsIgnoreCase("tax:pb")){
285
		        //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
286
		        if (child.getAttributes() !=null) {
287
		            logger.info("First Attribute: " + child.getAttributes().item(0));
288
		        }
289
		        extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, notMarkedUp);
290
		    }else{
291
		    	//FIXME
292
		    	logger.warn("Unhandled");
293
		    }
294
		}
295
		return acceptedTaxon;
296
	}
297

    
298

    
299
    protected Map<String,Feature> getFeaturesUsed(){
300
        return featuresMap;
301
    }
302
    /**
303
     *
304
     */
305
    private void buildFeatureTree() {
306
        logger.info("buildFeatureTree");
307
        FeatureTree proibiospheretree = importer.getFeatureTreeService().find(proIbioTreeUUID);
308
        if (proibiospheretree == null){
309
            List<FeatureTree> trees = importer.getFeatureTreeService().list(FeatureTree.class, null, null, null, null);
310
            if (trees.size()==1) {
311
                FeatureTree<Feature> ft = trees.get(0);
312
                if (featuresMap==null) {
313
                    featuresMap=new HashMap<String, Feature>();
314
                }
315
                for (Feature feature: ft.getDistinctFeatures()){
316
                    if(feature!=null) {
317
                        featuresMap.put(feature.getTitleCache(), feature);
318
                    }
319
                }
320
            }
321
            proibiospheretree = FeatureTree.NewInstance();
322
            proibiospheretree.setUuid(proIbioTreeUUID);
323
        }
324
        //        FeatureNode root = proibiospheretree.getRoot();
325
        FeatureNode root2 = proibiospheretree.getRoot();
326
        if (root2 != null){
327
            int nbChildren = root2.getChildCount()-1;
328
            while (nbChildren>-1){
329
                try{
330
                    root2.removeChild(nbChildren);
331
                }catch(Exception e){logger.warn("Can't remove child from FeatureTree "+e);}
332
                nbChildren --;
333
            }
334

    
335
        }
336

    
337
        for (Feature feature:featuresMap.values()) {
338
            root2.addChild(FeatureNode.NewInstance(feature));
339
        }
340
        importer.getFeatureTreeService().saveOrUpdate(proibiospheretree);
341

    
342
    }
343

    
344

    
345
    /**
346
     * @param keys
347
     * @param acceptedTaxon: the current acceptedTaxon
348
     * @param nametosave: the list of objects to save into the CDM
349
     * @param refMods: the current reference extracted from the MODS
350
     */
351
    /*   @SuppressWarnings("rawtypes")
352
    private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonName> nametosave, Reference refMods) {
353
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
354

    
355
        NodeList children = keys.getChildNodes();
356
        String key="";
357
        PolytomousKey poly =  PolytomousKey.NewInstance();
358
        poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
359
        poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
360
        poly.addTaxonomicScope(acceptedTaxon);
361
        poly.setTitleCache("bloup", true);
362
        //        poly.addCoveredTaxon(acceptedTaxon);
363
        PolytomousKeyNode root = poly.getRoot();
364
        PolytomousKeyNode previous = null,tmpKey=null;
365
        Taxon taxonKey=null;
366
        List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
367

    
368
        //        String fullContent = keys.getTextContent();
369
        for (int i=0;i<children.getLength();i++){
370
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
371
                NodeList paragraph = children.item(i).getChildNodes();
372
                key="";
373
                taxonKey=null;
374
                for (int j=0;j<paragraph.getLength();j++){
375
                    if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
376
                        if (! paragraph.item(j).getTextContent().trim().isEmpty()){
377
                            key+=paragraph.item(j).getTextContent().trim();
378
                            //                            logger.info("KEY: "+j+"--"+key);
379
                        }
380
                    }
381
                    if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
382
                        taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
383
                    }
384
                }
385
                //                logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
386
                if (keypattern.matcher(key).matches()){
387
                    tmpKey = PolytomousKeyNode.NewInstance(key);
388
                    if (taxonKey!=null) {
389
                        tmpKey.setTaxon(taxonKey);
390
                    }
391
                    polyNodes.add(tmpKey);
392
                    if (previous == null) {
393
                        root.addChild(tmpKey);
394
                    } else {
395
                        previous.addChild(tmpKey);
396
                    }
397
                }else{
398
                    if (!key.isEmpty()){
399
                        tmpKey=PolytomousKeyNode.NewInstance(key);
400
                        if (taxonKey!=null) {
401
                            tmpKey.setTaxon(taxonKey);
402
                        }
403
                        polyNodes.add(tmpKey);
404
                        if (keypatternend.matcher(key).matches()) {
405
                            root.addChild(tmpKey);
406
                            previous=tmpKey;
407
                        } else{
408
                            previous.addChild(tmpKey);
409
                        }
410

    
411
                    }
412
                }
413
            }
414
        }
415
        importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
416
        importer.getPolytomousKeyService().saveOrUpdate(poly);
417
    }
418
*/
419

    
420

    
421
    /**
422
     * @param taxons: the XML Nodegroup
423
     * @param nametosave: the list of objects to save into the CDM
424
     * @param acceptedTaxon: the current accepted Taxon
425
     * @param refMods: the current reference extracted from the MODS
426
     *
427
     * @return Taxon object built
428
     */
429
    @SuppressWarnings({ "rawtypes", "unused" })
430
    private TaxonName getTaxonNameFromXML(Node taxons, List<TaxonName> nametosave, Reference refMods, boolean isSynonym) {
431
        //        logger.info("getTaxonFromXML");
432
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
433
        logger.info("getTaxonNameFromXML");
434
        TaxonName nameToBeFilled = null;
435

    
436
        currentMyName=new MyName(isSynonym);
437

    
438
        NomenclaturalStatusType statusType = null;
439
        try {
440
        	String followingText = null;  //needs to be checked if following text is possible
441
            currentMyName = extractScientificName(taxons,refMods, null);
442
        } catch (TransformerFactoryConfigurationError e1) {
443
            logger.warn(e1);
444
        } catch (TransformerException e1) {
445
            logger.warn(e1);
446
        }
447
        /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
448

    
449
        nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
450
        if (nameToBeFilled.hasProblem() &&
451
                !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
452
            //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
453
            addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
454
            nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
455
        }
456

    
457
        nameToBeFilled = getTaxonName(nameToBeFilled,nametosave,statusType);
458
         */
459
        nameToBeFilled = currentMyName.getTaxonName();
460
        return nameToBeFilled;
461

    
462
    }
463

    
464

    
465
    /**
466
     *
467
     */
468
    private void reloadClassification() {
469
        logger.info("reloadClassification");
470
        Classification cl = importer.getClassificationService().find(classification.getUuid());
471
        if (cl != null){
472
            classification = cl;
473
        }else{
474
            importer.getClassificationService().saveOrUpdate(classification);
475
            classification = importer.getClassificationService().find(classification.getUuid());
476
        }
477
    }
478

    
479
    //    /**
480
    //     * Create a Taxon for the current NameBase, based on the current reference
481
    //     * @param taxonName
482
    //     * @param refMods: the current reference extracted from the MODS
483
    //     * @return Taxon
484
    //     */
485
    //    @SuppressWarnings({ "unused", "rawtypes" })
486
    //    private Taxon getTaxon(TaxonName taxonName, Reference refMods) {
487
    //        Taxon t = new Taxon(taxonName,null );
488
    //        if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
489
    //            t.setSec(configState.getConfig().getSecundum());
490
    //            logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
491
    //        }
492
    //        /*<<<<<<< .courant
493
    //        boolean sourceExists=false;
494
    //        Set<IdentifiableSource> sources = t.getSources();
495
    //        for (IdentifiableSource src : sources){
496
    //            String micro = src.getCitationMicroReference();
497
    //            Reference r = src.getCitation();
498
    //            if (r.equals(refMods) && micro == null) {
499
    //                sourceExists=true;
500
    //            }
501
    //        }
502
    //        if(!sourceExists) {
503
    //            t.addSource(null,null,refMods,null);
504
    //        }
505
    //=======*/
506
    //        t.addSource(OriginalSourceType.Import,null,null,refMods,null);
507
    //        t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
508
    //        return t;
509
    //    }
510

    
511
    private void  extractDescriptionWithReference(Node typestatus, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods,
512
            String featureName) {
513
        //        System.out.println("extractDescriptionWithReference !");
514
        logger.info("extractDescriptionWithReference");
515
        NodeList children = typestatus.getChildNodes();
516

    
517
        Feature currentFeature=getFeatureObjectFromString(featureName);
518

    
519
        String r="";String s="";
520
        for (int i=0;i<children.getLength();i++){
521
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
522
                s+=children.item(i).getTextContent().trim();
523
            }
524
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
525
                r+= children.item(i).getTextContent().trim();
526
            }
527
            if (s.indexOf(r)>-1) {
528
                s=s.split(r)[0];
529
            }
530
        }
531

    
532
        Reference currentref =  ReferenceFactory.newGeneric();
533
        if(!r.isEmpty()) {
534
            currentref.setTitleCache(r, true);
535
        } else {
536
            currentref=refMods;
537
        }
538
        setParticularDescription(s,acceptedTaxon,defaultTaxon, currentref, refMods,currentFeature);
539
    }
540

    
541
    /**
542
     * @param nametosave
543
     * @param distribution: the XML node group
544
     * @param acceptedTaxon: the current accepted Taxon
545
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
546
     * @param refMods: the current reference extracted from the MODS
547
     */
548
    @SuppressWarnings("rawtypes")
549
    private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonName> nametosave, Reference refMods) {
550
        logger.info("extractDistribution");
551
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
552
        NodeList children = distribution.getChildNodes();
553
        Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
554
        Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
555

    
556
        for (int i=0;i<children.getLength();i++){
557
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
558
                NodeList paragraph = children.item(i).getChildNodes();
559
                for (int j=0;j<paragraph.getLength();j++){
560
                    if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
561
                        extractText(descriptionsFulltext, i, paragraph.item(j));
562
                    }
563
                    else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
564
                        extractInLine(nametosave, refMods, descriptionsFulltext, i,paragraph.item(j));
565
                    }
566
                    else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
567
                        MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
568
                        DerivedUnit derivedUnitBase = null;
569
                        specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit, null);
570
                        extractTextFromSpecimenOrObservation(specimenOrObservations, descriptionsFulltext, i, specimenOrObservation);
571
                    }
572
                }
573
            }
574
        }
575

    
576
        int m=0;
577
        for (int k:descriptionsFulltext.keySet()) {
578
            if (k>m) {
579
                m=k;
580
            }
581
        }
582
        for (int k:specimenOrObservations.keySet()) {
583
            if (k>m) {
584
                m=k;
585
            }
586
        }
587

    
588

    
589
        if(acceptedTaxon!=null){
590
            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
591
            Feature currentFeature = Feature.DISTRIBUTION();
592
            //        DerivedUnit derivedUnitBase=null;
593
            //        String descr="";
594
            for (int k=0;k<=m;k++){
595
                if(specimenOrObservations.keySet().contains(k)){
596
                    for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
597
                        handleAssociation(acceptedTaxon, refMods, td, soo);
598
                    }
599
                }
600

    
601
                if (descriptionsFulltext.keySet().contains(k)){
602
                    if (!stringIsEmpty(descriptionsFulltext.get(k).trim()) && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
603
                        setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
604
                        break;
605
                    }
606
                    else{
607
                        handleTextData(refMods, descriptionsFulltext, td, currentFeature, k);
608
                    }
609
                }
610

    
611
                if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
612
                    acceptedTaxon.addDescription(td);
613
                    sourceHandler.addAndSaveSource(refMods, td, null);
614
                    importer.getTaxonService().saveOrUpdate(acceptedTaxon);
615
                }
616
            }
617
        }
618
    }
619

    
620
    /**
621
     * @param refMods
622
     * @param descriptionsFulltext
623
     * @param td
624
     * @param currentFeature
625
     * @param k
626
     */
627
    private void handleTextData(Reference refMods, Map<Integer, String> descriptionsFulltext, TaxonDescription td,
628
            Feature currentFeature, int k) {
629
        //logger.info("handleTextData");
630
        TextData textData = TextData.NewInstance();
631
        textData.setFeature(currentFeature);
632
        textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
633
        sourceHandler.addSource(refMods, textData);
634
        td.addElement(textData);
635
    }
636

    
637
    /**
638
     * @param acceptedTaxon
639
     * @param refMods
640
     * @param td
641
     * @param soo
642
     */
643
    private void handleAssociation(Taxon acceptedTaxon, Reference refMods, TaxonDescription td, MySpecimenOrObservation soo) {
644
        logger.info("handleAssociation");
645
        String descr=soo.getDescr();
646
        DerivedUnit derivedUnitBase = soo.getDerivedUnitBase();
647

    
648
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
649

    
650
        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
651

    
652
        Feature feature=null;
653
        feature = makeFeature(derivedUnitBase);
654
        if(!StringUtils.isEmpty(descr)) {
655
            derivedUnitBase.setTitleCache(descr, true);
656
        }
657

    
658
        IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
659

    
660
        taxonDescription.addElement(indAssociation);
661
        sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
662
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
663
        td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
664
    }
665

    
666
    /**
667
     * create an individualAssociation
668
     * @param refMods
669
     * @param derivedUnitBase
670
     * @param feature
671
     * @return
672
     */
673
    private IndividualsAssociation createIndividualAssociation(Reference refMods, DerivedUnit derivedUnitBase,
674
            Feature feature) {
675
        logger.info("createIndividualAssociation");
676
        IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
677
        indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
678
        indAssociation.setFeature(feature);
679
        indAssociation = sourceHandler.addSource(refMods, indAssociation);
680
        return indAssociation;
681
    }
682

    
683
    /**
684
     * @param specimenOrObservations
685
     * @param descriptionsFulltext
686
     * @param i
687
     * @param specimenOrObservation
688
     */
689
    private void extractTextFromSpecimenOrObservation(Map<Integer, List<MySpecimenOrObservation>> specimenOrObservations,
690
            Map<Integer, String> descriptionsFulltext, int i, MySpecimenOrObservation specimenOrObservation) {
691
        logger.info("extractTextFromSpecimenOrObservation");
692
        List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
693
        if (speObsList == null) {
694
            speObsList=new ArrayList<MySpecimenOrObservation>();
695
        }
696
        speObsList.add(specimenOrObservation);
697
        specimenOrObservations.put(i,speObsList);
698

    
699
        String s = specimenOrObservation.getDerivedUnitBase().toString();
700
        if (descriptionsFulltext.get(i) !=null){
701
            s = descriptionsFulltext.get(i)+" "+s;
702
        }
703
        descriptionsFulltext.put(i, s);
704
    }
705

    
706
    /**
707
     * Extract the text with the inline link to a taxon
708
     * @param nametosave
709
     * @param refMods
710
     * @param descriptionsFulltext
711
     * @param i
712
     * @param paragraph
713
     */
714
    @SuppressWarnings("rawtypes")
715
    private void extractInLine(List<TaxonName> nametosave, Reference refMods, Map<Integer, String> descriptionsFulltext,
716
            int i, Node paragraph) {
717
        //logger.info("extractInLine");
718
        String inLine=getInlineTextForName(nametosave, refMods, paragraph);
719
        if (descriptionsFulltext.get(i) !=null){
720
            inLine = descriptionsFulltext.get(i)+inLine;
721
        }
722
        descriptionsFulltext.put(i, inLine);
723
    }
724

    
725
    /**
726
     * Extract the raw text from a Node
727
     * @param descriptionsFulltext
728
     * @param node
729
     * @param j
730
     */
731
    private void extractText(Map<Integer, String> descriptionsFulltext, int i, Node node) {
732
        //logger.info("extractText");
733
        if(!node.getTextContent().trim().isEmpty()) {
734
            String s =node.getTextContent().trim();
735
            if (descriptionsFulltext.get(i) !=null){
736
                s = descriptionsFulltext.get(i)+" "+s;
737
            }
738
            descriptionsFulltext.put(i, s);
739
        }
740
    }
741

    
742

    
743
    /**
744
     * @param materials: the XML node group
745
     * @param acceptedTaxon: the current accepted Taxon
746
     * @param refMods: the current reference extracted from the MODS
747
     */
748
    @SuppressWarnings("rawtypes")
749
    private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference refMods,List<TaxonName> nametosave) {
750
        logger.info("EXTRACTMATERIALS");
751
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
752
        NodeList children = materials.getChildNodes();
753
        NodeList events = null;
754
        //        String descr="";
755

    
756

    
757
        for (int i=0;i<children.getLength();i++){
758
            String rawAssociation="";
759
            boolean added=false;
760
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
761
                events = children.item(i).getChildNodes();
762
                for(int k=0;k<events.getLength();k++){
763
                    if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
764
                        String inLine= getInlineTextForName(nametosave, refMods, events.item(k));
765
                        if(!inLine.isEmpty()) {
766
                            rawAssociation+=inLine;
767
                        }
768
                    }
769
                    if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
770
                            && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
771
                        rawAssociation+= events.item(k).getTextContent().trim();
772
                    }
773
                    if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
774
                        if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
775
                            rawAssociation="no description text";
776
                        }
777
                        added=true;
778
                        handleDerivedUnitFacadeAndBase(acceptedTaxon, refMods, events.item(k), rawAssociation);
779
                    }
780
                    if (!rawAssociation.isEmpty() && !added){
781

    
782
                        Feature feature = Feature.MATERIALS_EXAMINED();
783
                        featuresMap.put(feature.getTitleCache(),feature);
784

    
785
                        TextData textData = createTextData(rawAssociation, refMods, feature);
786

    
787
                        if(! rawAssociation.isEmpty() && (acceptedTaxon!=null)){
788
                            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
789
                            td.addElement(textData);
790
                            acceptedTaxon.addDescription(td);
791
                            sourceHandler.addAndSaveSource(refMods, td, null);
792
                        }
793
                        //                        DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
794
                        //                        derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
795
                        //
796
                        //                        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
797
                        //                        acceptedTaxon.addDescription(taxonDescription);
798
                        //
799
                        //                        IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
800
                        //
801
                        //                        Feature feature = Feature.MATERIALS_EXAMINED();
802
                        //                        featuresMap.put(feature.getTitleCache(),feature);
803
                        //                        if(!StringUtils.isEmpty(rawAssociation)) {
804
                        //                            derivedUnitBase.setTitleCache(rawAssociation, true);
805
                        //                        }
806
                        //                        indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
807
                        //                        indAssociation.setFeature(feature);
808
                        //                        indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
809
                        //
810
                        //                        /*boolean sourceExists=false;
811
                        //                        Set<DescriptionElementSource> dsources = indAssociation.getSources();
812
                        //                        for (DescriptionElementSource src : dsources){
813
                        //                            String micro = src.getCitationMicroReference();
814
                        //                            Reference r = src.getCitation();
815
                        //                            if (r.equals(refMods) && micro == null) {
816
                        //                                sourceExists=true;
817
                        //                            }
818
                        //                        }
819
                        //                        if(!sourceExists) {
820
                        //                            indAssociation.addSource(null, null, refMods, null);
821
                        //                        }*/
822
                        //                        taxonDescription.addElement(indAssociation);
823
                        //                        taxonDescription.setTaxon(acceptedTaxon);
824
                        //                        taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
825
                        //
826
                        //                        /*sourceExists=false;
827
                        //                        Set<IdentifiableSource> sources = taxonDescription.getSources();
828
                        //                        for (IdentifiableSource src : sources){
829
                        //                            String micro = src.getCitationMicroReference();
830
                        //                            Reference r = src.getCitation();
831
                        //                            if (r.equals(refMods) && micro == null) {
832
                        //                                sourceExists=true;
833
                        //                            }
834
                        //                        }
835
                        //                        if(!sourceExists) {
836
                        //                            taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
837
                        //                        }*/
838
                        //
839
                        //                        importer.getDescriptionService().saveOrUpdate(taxonDescription);
840
                        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
841

    
842
                        rawAssociation="";
843
                    }
844
                }
845
            }
846
        }
847
    }
848

    
849
    /**
850
     * @param acceptedTaxon
851
     * @param refMods
852
     * @param events
853
     * @param rawAssociation
854
     * @param k
855
     */
856
    private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon, Reference refMods, Node event,
857
            String rawAssociation) {
858
        logger.info("handleDerivedUnitFacadeAndBase");
859
        String descr;
860
        DerivedUnit derivedUnitBase;
861
        MySpecimenOrObservation myspecimenOrObservation;
862
        DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
863
        derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
864

    
865
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
866

    
867
        //TODO this may not always be correct, ask user
868
        TaxonName typifiableName = acceptedTaxon != null ?  acceptedTaxon.getName() : null;
869
        myspecimenOrObservation = extractSpecimenOrObservation(event,derivedUnitBase,SpecimenOrObservationType.DerivedUnit, typifiableName);
870
        derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
871
        descr=myspecimenOrObservation.getDescr();
872

    
873
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
874

    
875
        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
876

    
877
        Feature feature = makeFeature(derivedUnitBase);
878
        featuresMap.put(feature.getTitleCache(),feature);
879
        if(!StringUtils.isEmpty(descr)) {
880
            derivedUnitBase.setTitleCache(descr, true);
881
        }
882

    
883
        IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
884

    
885
        taxonDescription.addElement(indAssociation);
886
        sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
887
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
888
    }
889

    
890

    
891

    
892
    /**
893
     * @param currentName
894
     * @param materials: the XML node group
895
     * @param acceptedTaxon: the current accepted Taxon
896
     * @param refMods: the current reference extracted from the MODS
897
     */
898
    private String extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference refMods, String event, TaxonName currentName) {
899
        logger.info("extractMaterialsDirect");
900
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
901
        String descr="";
902

    
903
        DerivedUnit derivedUnitBase=null;
904
        MySpecimenOrObservation myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.DerivedUnit, currentName);
905
        derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
906

    
907
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
908

    
909
        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
910

    
911
        Feature feature=null;
912
        if (event.equalsIgnoreCase("collection")){
913
            feature = makeFeature(derivedUnitBase);
914
        }
915
        else{
916
            feature = Feature.MATERIALS_EXAMINED();
917
        }
918
        featuresMap.put(feature.getTitleCache(),  feature);
919

    
920
        descr=myspecimenOrObservation.getDescr();
921
        if(!StringUtils.isEmpty(descr)) {
922
            derivedUnitBase.setTitleCache(descr, true);
923
        }
924

    
925
        IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
926

    
927
        taxonDescription.addElement(indAssociation);
928
        sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
929
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
930

    
931
        return derivedUnitBase.getTitleCache();
932

    
933
    }
934

    
935

    
936
    /**
937
     * @param description: the XML node group
938
     * @param acceptedTaxon: the current acceptedTaxon
939
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
940
     * @param nametosave: the list of objects to save into the CDM
941
     * @param refMods: the current reference extracted from the MODS
942
     * @param featureName: the feature name
943
     */
944
    @SuppressWarnings({ "rawtypes"})
945
    private String extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
946
            List<TaxonName> nametosave, Reference refMods, String featureName ) {
947
        logger.info("extractSpecificFeature "+featureName);
948
        //        System.out.println("GRUUUUuu");
949
        NodeList children = description.getChildNodes();
950
        NodeList insideNodes ;
951
        NodeList trNodes;
952
        //        String descr ="";
953
        String localdescr="";
954
        List<String> blabla=null;
955
        List<String> text = new ArrayList<String>();
956

    
957
        String table="<table>";
958
        String head="";
959
        String line="";
960

    
961
        Feature currentFeature=getFeatureObjectFromString(featureName);
962

    
963
        //        String fullContent = description.getTextContent();
964
        for (int i=0;i<children.getLength();i++){
965
            //            localdescr="";
966
            if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
967
                text.add(children.item(i).getTextContent().trim());
968
            }
969
            if (featureName.equalsIgnoreCase("table")){
970
                if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
971
                        children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
972
                    head = extractTableHead(children.item(i));
973
                    table+=head;
974
                    line = extractTableLine(children.item(i));
975
                    if (!line.equalsIgnoreCase("<tr></tr>")) {
976
                        table+=line;
977
                    }
978
                }
979
                if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
980
                        children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
981
                    line = extractTableLineWithColumn(children.item(i).getChildNodes());
982
                    if(!line.equalsIgnoreCase("<tr></tr>")) {
983
                        table+=line;
984
                    }
985
                }
986
            }
987
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
988
                insideNodes=children.item(i).getChildNodes();
989
                blabla= new ArrayList<String>();
990
                for (int j=0;j<insideNodes.getLength();j++){
991
                    Node insideNode = insideNodes.item(j);
992
                	if (insideNode.getNodeName().equalsIgnoreCase("tax:name")){
993
                        String inlinetext = getInlineTextForName(nametosave, refMods, insideNode);
994
                        if (!inlinetext.isEmpty()) {
995
                            blabla.add(inlinetext);
996
                        }
997
                    }
998
                    else if (insideNode.getNodeName().equalsIgnoreCase("#text")) {
999
                        if(!insideNode.getTextContent().trim().isEmpty()){
1000
                            blabla.add(insideNode.getTextContent().trim());
1001
                            //                            localdescr += insideNodes.item(j).getTextContent().trim();
1002
                        }
1003
                    }
1004
                }
1005
                if (!blabla.isEmpty()) {
1006
                    String blaStr = StringUtils.join(blabla," ").trim();
1007
                    if(!stringIsEmpty(blaStr)) {
1008
                        setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1009
                        text.add(blaStr);
1010
                    }
1011
                }
1012

    
1013
            }
1014
            if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1015
                if(!children.item(i).getTextContent().trim().isEmpty()){
1016
                    localdescr = children.item(i).getTextContent().trim();
1017
                    if(!stringIsEmpty(localdescr)) {
1018
                        setParticularDescription(localdescr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1019
                    }
1020
                }
1021
            }
1022
        }
1023

    
1024
        table+="</table>";
1025
        if (!table.equalsIgnoreCase("<table></table>")){
1026
            //            System.out.println("TABLE : "+table);
1027
            text.add(table);
1028
        }
1029

    
1030
        if (text !=null && !text.isEmpty()) {
1031
            return StringUtils.join(text," ");
1032
        } else {
1033
            return "";
1034
        }
1035

    
1036
    }
1037

    
1038
    /**
1039
     * @param children
1040
     * @param i
1041
     * @return
1042
     */
1043
    private String extractTableLine(Node child) {
1044
        //logger.info("extractTableLine");
1045
        String line;
1046
        line="<tr>";
1047
        if (child.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1048
            line = extractTableLineWithColumn(child.getChildNodes());
1049
        }
1050
        line+="</tr>";
1051
        return line;
1052
    }
1053

    
1054
    /**
1055
     * @param children
1056
     * @param i
1057
     * @return
1058
     */
1059
    private String extractTableHead(Node child) {
1060
        //logger.info("extractTableHead");
1061
        String head;
1062
        String line;
1063
        head="<th>";
1064
        NodeList trNodes = child.getChildNodes();
1065
        for (int k=0;k<trNodes.getLength();k++){
1066
            if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:div")
1067
                    && trNodes.item(k).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1068
                line = extractTableLineWithColumn(trNodes.item(k).getChildNodes());
1069
                head+=line;
1070
            }
1071
        }
1072
        head+="</th>";
1073
        return head;
1074
    }
1075

    
1076
    /**
1077
     * build a html table line, with td columns
1078
     * @param tdNodes
1079
     * @return an html coded line
1080
     */
1081
    private String extractTableLineWithColumn(NodeList tdNodes) {
1082
        //logger.info("extractTableLineWithColumn");
1083
        String line;
1084
        line="<tr>";
1085
        for (int l=0;l<tdNodes.getLength();l++){
1086
            if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1087
                line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1088
            }
1089
        }
1090
        line+="</tr>";
1091
        return line;
1092
    }
1093

    
1094
    /**
1095
     * @param description: the XML node group
1096
     * @param acceptedTaxon: the current acceptedTaxon
1097
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1098
     * @param nametosave: the list of objects to save into the CDM
1099
     * @param refMods: the current reference extracted from the MODS
1100
     * @param featureName: the feature name
1101
     */
1102
    @SuppressWarnings({ "unused", "rawtypes" })
1103
    private String extractSpecificFeatureNotStructured(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1104
            List<TaxonName> nameToSave, Reference refMods, String featureName ) {
1105
        logger.info("extractSpecificFeatureNotStructured " + featureName);
1106
        NodeList children = description.getChildNodes();
1107
        NodeList insideNodes ;
1108
        List<String> blabla= new ArrayList<String>();
1109

    
1110

    
1111
        Feature currentFeature = getFeatureObjectFromString(featureName);
1112

    
1113
        String fullContent = description.getTextContent();
1114
        for (int i=0;i<children.getLength();i++){
1115
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1116
                insideNodes=children.item(i).getChildNodes();
1117
                for (int j=0;j<insideNodes.getLength();j++){
1118
                    if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1119
                        String inlineText =getInlineTextForName(nameToSave, refMods, insideNodes.item(j));
1120
                        if(!inlineText.isEmpty()) {
1121
                            blabla.add(inlineText);
1122
                        }
1123
                    }
1124
                    if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1125
                        if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1126
                            blabla.add(insideNodes.item(j).getTextContent().trim());
1127
                        }
1128
                    }
1129
                }
1130
            }
1131
            if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1132
                if(!children.item(i).getTextContent().trim().isEmpty()){
1133
                    String localdescr = children.item(i).getTextContent().trim();
1134
                    if(!localdescr.isEmpty())
1135
                    {
1136
                        blabla.add(localdescr);
1137
                    }
1138
                }
1139
            }
1140
        }
1141

    
1142
        if (blabla !=null && !blabla.isEmpty()) {
1143
            String blaStr = StringUtils.join(blabla," ").trim();
1144
            if (! stringIsEmpty(blaStr)) {
1145
                setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1146
                return blaStr;
1147
            } else {
1148
                return "";
1149
            }
1150
        } else {
1151
            return "";
1152
        }
1153

    
1154
    }
1155

    
1156
    /**
1157
     * @param blaStr
1158
     * @return
1159
     */
1160
    private boolean stringIsEmpty(String blaStr) {
1161
        if (blaStr.matches("(\\.|,|;|\\.-)?")){
1162
        	return true;
1163
        }else{
1164
        	return false;
1165
        }
1166
    }
1167

    
1168
    /**
1169
     * @param nametosave
1170
     * @param refMods
1171
     * @param insideNodes
1172
     * @param blabla
1173
     * @param j
1174
     */
1175
    @SuppressWarnings({ "rawtypes" })
1176
    private String getInlineTextForName(List<TaxonName> nametosave, Reference refMods, Node insideNode) {
1177
        if (true){
1178
        	NodeList children = insideNode.getChildNodes();
1179
        	String result = "";
1180
            for (int i=0;i<children.getLength();i++){
1181
            	Node nameChild = children.item(i);
1182
                if(nameChild.getNodeName().equalsIgnoreCase("#text")){
1183
                	result += nameChild.getTextContent();
1184
                }else{
1185
                	//do nothing
1186
                }
1187
            }
1188
        	return result.replace("\n", "").trim();
1189
        }else{
1190
	    	TaxonName tnb = getTaxonNameFromXML(insideNode, nametosave,refMods,false);
1191
	        //                        Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1192
	        Taxon tax = currentMyName.getTaxon();
1193
	        if(tnb !=null && tax != null){
1194
	            String linkedTaxon = tnb.getTitleCache().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1195
	            return "<cdm:taxon uuid='"+tax.getUuid()+"'>"+linkedTaxon+"</cdm:taxon>";
1196
	        }else if (tnb != null && tax == null){
1197
	        	//TODO
1198
	        	return "<cdm:taxonName uuid='" + tnb.getUuid() +"'>" + tnb.getTitleCache().split("sec")[0]  +"</cdm:taxonName>";
1199
	        }else{
1200
	        	logger.warn("Inline text has no content yet");
1201
	        }
1202
	        return "";
1203
        }
1204
    }
1205

    
1206
    /**
1207
     * @param featureName
1208
     * @return
1209
     */
1210
    @SuppressWarnings("rawtypes")
1211
    private Feature getFeatureObjectFromString(String featureName) {
1212
        logger.info("getFeatureObjectFromString");
1213
        List<Feature> features = importer.getTermService().list(Feature.class, null,null,null,null);
1214
        Feature currentFeature=null;
1215
        for (Feature feature: features){
1216
            String tmpF = feature.getTitleCache();
1217
            if (tmpF.equalsIgnoreCase(featureName)) {
1218
                currentFeature=feature;
1219
                //                System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1220
            }
1221
        }
1222
        if (currentFeature == null) {
1223
            currentFeature=Feature.NewInstance(featureName, featureName, featureName);
1224
            if(featureName.equalsIgnoreCase("Other")){
1225
                currentFeature.setUuid(OtherUUID);
1226
            }
1227
            if(featureName.equalsIgnoreCase(notMarkedUp)){
1228
                currentFeature.setUuid(NotMarkedUpUUID);
1229
            }
1230
            importer.getTermService().saveOrUpdate(currentFeature);
1231
        }
1232
        return currentFeature;
1233
    }
1234

    
1235

    
1236

    
1237

    
1238
    /**
1239
     * @param children: the XML node group
1240
     * @param nametosave: the list of objects to save into the CDM
1241
     * @param acceptedTaxon: the current acceptedTaxon
1242
     * @param refMods: the current reference extracted from the MODS
1243
     * @param fullContent :the parsed XML content
1244
     * @return a list of description (text)
1245
     */
1246
    @SuppressWarnings({ "unused", "rawtypes" })
1247
    private List<String> parseParagraph(List<TaxonName> namesToSave, Taxon acceptedTaxon, Reference refMods, Node paragraph, Feature feature){
1248
        logger.info("parseParagraph "+feature.toString());
1249
        List<String> fullDescription=  new ArrayList<String>();
1250
        //        String localdescr;
1251
        String descr="";
1252
        NodeList insideNodes ;
1253
        boolean collectionEvent = false;
1254
        List<Node>collectionEvents = new ArrayList<Node>();
1255

    
1256
        NodeList children = paragraph.getChildNodes();
1257

    
1258
        for (int i=0;i<children.getLength();i++){
1259
            //            localdescr="";
1260
            if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1261
                descr += children.item(i).getTextContent().trim();
1262
            }
1263
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1264
                insideNodes=children.item(i).getChildNodes();
1265
                List<String> blabla= new ArrayList<String>();
1266
                for (int j=0;j<insideNodes.getLength();j++){
1267
                    boolean nodeKnown = false;
1268
                    //    System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1269
                    if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1270
                        String inlineText = getInlineTextForName(namesToSave, refMods, insideNodes.item(j));
1271
                        if (!inlineText.isEmpty()) {
1272
                            blabla.add(inlineText);
1273
                        }
1274
                        nodeKnown=true;
1275
                    }
1276
                    else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1277
                        if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1278
                            blabla.add(insideNodes.item(j).getTextContent().trim());
1279
                            // localdescr += insideNodes.item(j).getTextContent().trim();
1280
                        }
1281
                        nodeKnown=true;
1282
                    }
1283
                    else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
1284
                        String ref = insideNodes.item(j).getTextContent().trim();
1285
                        if (ref.endsWith(";")  && ((ref.length())>1)) {
1286
                            ref=ref.substring(0, ref.length()-1)+".";
1287
                        }
1288
                        Reference reference = ReferenceFactory.newGeneric();
1289
                        reference.setTitleCache(ref, true);
1290
                        blabla.add(reference.getTitleCache());
1291
                        nodeKnown=true;
1292
                    }
1293
                    else if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:figure")){
1294
                        String figure = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "figure");
1295
                        blabla.add(figure);
1296
                    }
1297
                    else if(insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:div") &&
1298
                            insideNodes.item(j).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1299
                            insideNodes.item(j).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1300
                        String table = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1301
                        blabla.add(table);
1302
                    }
1303
                    else if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1304
                        //                        logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1305
                        String titlecache  = extractMaterialsDirect(insideNodes.item(j), acceptedTaxon, refMods, "collection", null);
1306
                        blabla.add(titlecache);
1307
                        collectionEvent=true;
1308
                        collectionEvents.add(insideNodes.item(j));
1309
                        nodeKnown=true;
1310
                    }else{
1311
                    	logger.warn("node not handled yet: " + insideNodes.item(j).getNodeName());
1312
                    }
1313

    
1314
                }
1315
                if (!StringUtils.isBlank(StringUtils.join(blabla," "))) {
1316
                    fullDescription.add(StringUtils.join(blabla," "));
1317
                }
1318
            }
1319
            if  (children.item(i).getNodeName().equalsIgnoreCase("tax:figure")){
1320
                String figure = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "Figure");
1321
                fullDescription.add(figure);
1322
            }
1323
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1324
                    children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1325
                    children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1326
                String table = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1327
                fullDescription.add(table);
1328
            }
1329
        }
1330

    
1331
        if( !stringIsEmpty(descr.trim())){
1332
            Feature currentFeature= getNotMarkedUpFeatureObject();
1333
            setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1334
        }
1335
        //        if (collectionEvent) {
1336
        //            logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1337
        //            for (Node coll:collectionEvents){
1338
        //                = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1339
        //            }
1340
        //        }
1341
        return fullDescription;
1342
    }
1343

    
1344

    
1345
    /**
1346
     * @param description: the XML node group
1347
     * @param acceptedTaxon: the current acceptedTaxon
1348
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1349
     * @param nametosave: the list of objects to save into the CDM
1350
     * @param refMods: the current reference extracted from the MODS
1351
     * @param feature: the feature to link the data with
1352
     */
1353
    @SuppressWarnings("rawtypes")
1354
    private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonName> namesToSave, Reference refMods, Feature feature){
1355
        logger.info("EXTRACT FEATURE "+feature.toString());
1356
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1357
        List<String> fullDescription= parseParagraph( namesToSave, acceptedTaxon, refMods, description,feature);
1358

    
1359
        //        System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1360
        if (!fullDescription.isEmpty() &&!stringIsEmpty(StringUtils.join(fullDescription,"\n").trim())) {
1361
            setParticularDescription(StringUtils.join(fullDescription,"\n").trim(),acceptedTaxon,defaultTaxon, refMods,feature);
1362
        }
1363

    
1364
    }
1365

    
1366

    
1367
    /**
1368
     * @param descr: the XML Nodegroup to parse
1369
     * @param acceptedTaxon: the current acceptedTaxon
1370
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1371
     * @param refMods: the current reference extracted from the MODS
1372
     * @param currentFeature: the feature name
1373
     * @return
1374
     */
1375
    private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods, Feature currentFeature) {
1376
        logger.info("setParticularDescription " + currentFeature.getTitleCache()+", \n blabla : "+descr);
1377

    
1378
        //remove redundant feature title
1379
        String featureStr = currentFeature.getTitleCache();
1380
        if (!descr.isEmpty() && descr.toLowerCase().startsWith(featureStr.toLowerCase())){
1381
        	descr = descr.replaceAll("(?i)" + featureStr + "\\.\\s*", "");
1382
        }
1383

    
1384

    
1385
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1386
        featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1387

    
1388
        TextData textData = createTextData(descr, refMods, currentFeature);
1389

    
1390
        if(acceptedTaxon!=null){
1391
            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1392
            td.addElement(textData);
1393
            acceptedTaxon.addDescription(td);
1394

    
1395
            sourceHandler.addAndSaveSource(refMods, td, null);
1396
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1397
        }
1398

    
1399
        if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1400
            try{
1401
                Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1402
                if (tmp!=null) {
1403
                    defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1404
                }else{
1405
                    importer.getTaxonService().saveOrUpdate(defaultTaxon);
1406
                }
1407
            }catch(Exception e){
1408
                logger.debug("TAXON EXISTS"+defaultTaxon);
1409
            }
1410

    
1411
            TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1412
            defaultTaxon.addDescription(td);
1413
            td.addElement(textData);
1414
            sourceHandler.addAndSaveSource(refMods, td, null);
1415
            importer.getTaxonService().saveOrUpdate(defaultTaxon);
1416
        }
1417
    }
1418

    
1419
    /**
1420
     * @param descr
1421
     * @param refMods
1422
     * @param currentFeature
1423
     * @return
1424
     */
1425
    private TextData createTextData(String descr, Reference refMods, Feature currentFeature) {
1426
        //logger.info("createTextData");
1427
        TextData textData = TextData.NewInstance();
1428
        textData.setFeature(currentFeature);
1429
        sourceHandler.addSource(refMods, textData);
1430

    
1431
        textData.putText(Language.UNKNOWN_LANGUAGE(), descr);
1432
        return textData;
1433
    }
1434

    
1435

    
1436

    
1437
    /**
1438
     * @param descr: the XML Nodegroup to parse
1439
     * @param acceptedTaxon: the current acceptedTaxon
1440
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1441
     * @param refMods: the current reference extracted from the MODS
1442
     * @param currentFeature: the feature name
1443
     * @return
1444
     */
1445
    private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon,Reference currentRef, Reference refMods, Feature currentFeature) {
1446
        //        System.out.println("setParticularDescriptionSPecial "+currentFeature);
1447
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
1448
        logger.info("setParticularDescription");
1449
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1450

    
1451
        featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1452
        TextData textData = createTextData(descr, refMods, currentFeature);
1453

    
1454
        if(! descr.isEmpty() && (acceptedTaxon!=null)){
1455
            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1456
            td.addElement(textData);
1457
            acceptedTaxon.addDescription(td);
1458

    
1459
            sourceHandler.addAndSaveSource(refMods, td, currentRef);
1460
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1461
        }
1462

    
1463
        if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1464
            try{
1465
                Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1466
                if (tmp!=null) {
1467
                    defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1468
                }else{
1469
                    importer.getTaxonService().saveOrUpdate(defaultTaxon);
1470
                }
1471
            }catch(Exception e){
1472
                logger.debug("TAXON EXISTS"+defaultTaxon);
1473
            }
1474

    
1475
            TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1476
            defaultTaxon.addDescription(td);
1477
            td.addElement(textData);
1478
            sourceHandler.addAndSaveSource(currentRef, td,currentRef);
1479
            importer.getTaxonService().saveOrUpdate(defaultTaxon);
1480
        }
1481
    }
1482

    
1483

    
1484

    
1485
    /**
1486
     * @param synonyms: the XML Nodegroup to parse
1487
     * @param nametosave: the list of objects to save into the CDM
1488
     * @param acceptedTaxon: the current acceptedTaxon
1489
     * @param refMods: the current reference extracted from the MODS
1490
     */
1491
    @SuppressWarnings({ "rawtypes" })
1492
    private void extractSynonyms(Node synonymsNode, Taxon acceptedTaxon,Reference refMods, String followingText) {
1493
        logger.info("extractSynonyms");
1494
        //System.out.println("extractSynonyms for: "+acceptedTaxon);
1495
        Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1496
        if (ttmp != null) {
1497
            acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1498
        }
1499
        else{
1500
            acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1501
        }
1502
        NodeList children = synonymsNode.getChildNodes();
1503
        List<MyName> names = new ArrayList<MyName>();
1504

    
1505
        if(synonymsNode.getNodeName().equalsIgnoreCase("tax:name")){
1506
            try {
1507
            	MyName myName = extractScientificNameSynonym(synonymsNode, refMods, followingText);
1508
                names.add(myName);
1509
            } catch (TransformerFactoryConfigurationError e) {
1510
                logger.warn(e);
1511
            } catch (TransformerException e) {
1512
                logger.warn(e);
1513
            }
1514
        }
1515

    
1516

    
1517
        for (int i=0;i<children.getLength();i++){
1518
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1519
                NodeList tmp = children.item(i).getChildNodes();
1520
                //                String fullContent = children.item(i).getTextContent();
1521
                for (int j=0; j< tmp.getLength();j++){
1522
                    if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1523
                        try {
1524
                        	MyName myName = extractScientificNameSynonym(tmp.item(j),refMods, followingText);
1525
                            names.add(myName);
1526
                        } catch (TransformerFactoryConfigurationError e) {
1527
                            logger.warn(e);
1528
                        } catch (TransformerException e) {
1529
                            logger.warn(e);
1530
                        }
1531
                    }
1532
                }
1533
            }
1534
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1535
                try {
1536
                	MyName myName = extractScientificNameSynonym(children.item(i),refMods, followingText);
1537
                    names.add(myName);
1538
                } catch (TransformerFactoryConfigurationError e) {
1539
                    logger.warn(e);
1540
                } catch (TransformerException e) {
1541
                    logger.warn(e);
1542
                }
1543

    
1544
            }
1545
        }
1546

    
1547
        for(MyName name:names){
1548
        	TaxonName nameToBeFilled = name.getTaxonName();
1549
            Synonym synonym = name.getSyno();
1550
            addFollowingTextToName(nameToBeFilled, followingText);
1551

    
1552
            /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1553
            nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1554
            if (nameToBeFilled.hasProblem() &&
1555
                    !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1556
                //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1557
                addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1558
                nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1559
            }
1560
            nameToBeFilled = getTaxonName(nameToBeFilled,nametosave,statusType);
1561
             */
1562
            if (!name.getIdentifier().isEmpty() && (name.getIdentifier().length()>2)){
1563
                setLSID(name.getIdentifier(), synonym);
1564
            }
1565

    
1566
            Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1567
            boolean synoExist = false;
1568
            for (Synonym syn: synonymsSet){
1569

    
1570
                boolean a =syn.getName().equals(synonym.getName());
1571
                boolean b = syn.getSec().equals(synonym.getSec());
1572
                if (a && b) {
1573
                    synoExist=true;
1574
                }
1575
            }
1576
            if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1577
                sourceHandler.addSource(refMods, synonym);
1578
                acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1579
            }
1580
        }
1581
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1582
    }
1583

    
1584

    
1585
    private boolean addFollowingTextToName(TaxonName nameToBeFilled, String followingText) {
1586
    	if (nameToBeFilled != null && StringUtils.isNotBlank(followingText)){
1587
    		if (! followingText.matches("\\d\\.?")){
1588

    
1589
	    		if (followingText.startsWith(",")){
1590
	    			followingText = followingText.substring(1).trim();
1591
	    		}
1592
	    		nameToBeFilled.setFullTitleCache(nameToBeFilled.getFullTitleCache()+ "," +followingText , true);
1593
    		}
1594
    		return true;
1595
    	}
1596
    	return false;
1597

    
1598
	}
1599

    
1600
	/**
1601
     * @param refgroup: the XML nodes
1602
     * @param nametosave: the list of objects to save into the CDM
1603
     * @param acceptedTaxon: the current acceptedTaxon
1604
     * @param nametosave: the list of objects to save into the CDM
1605
     * @param refMods: the current reference extracted from the MODS
1606
     * @return the acceptedTaxon (why?)
1607
     * handle cases where the bibref are inside <p> and outside
1608
     */
1609
    @SuppressWarnings({ "rawtypes" })
1610
    private Taxon extractReferences(Node refgroup, List<TaxonName> nametosave, Taxon acceptedTaxon, Reference refMods) {
1611
        logger.info("extractReferences");
1612
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1613

    
1614
        NodeList children = refgroup.getChildNodes();
1615
        INonViralName nameToBeFilled = getNonViralNameAccNomenclature();
1616

    
1617
        ReferenceBuilder refBuild = new ReferenceBuilder(sourceHandler);
1618
        for (int i=0;i<children.getLength();i++){
1619
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
1620
                String ref = children.item(i).getTextContent().trim();
1621
                refBuild.builReference(ref, treatmentMainName, nomenclaturalCode,  acceptedTaxon, refMods);
1622
                if (!refBuild.isFoundBibref()){
1623
                    extractReferenceRawText(children.item(i).getChildNodes(), nameToBeFilled, refMods, acceptedTaxon);
1624
                }
1625
            }
1626

    
1627
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1628
                NodeList references = children.item(i).getChildNodes();
1629
                String descr="";
1630
                for (int j=0;j<references.getLength();j++){
1631
                    if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1632
                        String ref = references.item(j).getTextContent().trim();
1633
                        refBuild.builReference(ref, treatmentMainName,  nomenclaturalCode,  acceptedTaxon, refMods);
1634
                    }
1635
                    else
1636
                        if (references.item(j).getNodeName().equalsIgnoreCase("#text")
1637
                                && !references.item(j).getTextContent().trim().isEmpty()){
1638
                            descr += references.item(j).getTextContent().trim();
1639
                        }
1640

    
1641
                }
1642
                if (!refBuild.isFoundBibref()){
1643
                    //if it's not tagged, put it as row information.
1644
                    //                    extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1645
                    //then put it as a not markup feature if not empty
1646
                    if (!stringIsEmpty(descr.trim())){
1647
                        Feature currentFeature= getNotMarkedUpFeatureObject();
1648
                        setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1649
                    }
1650
                }
1651
            }
1652
        }
1653
        //        importer.getClassificationService().saveOrUpdate(classification);
1654
        return acceptedTaxon;
1655

    
1656
    }
1657

    
1658
    /**
1659
     * get the non viral name according to the current nomenclature
1660
     * @return
1661
     */
1662

    
1663
    private INonViralName getNonViralNameAccNomenclature() {
1664
    	return nomenclaturalCode.getNewTaxonNameInstance(null);
1665
    }
1666

    
1667
    /**
1668
     * @return the feature object for the category "not marked up"
1669
     */
1670
    private Feature getNotMarkedUpFeatureObject() {
1671
    	// FIXME use getFeature(uuid ....)
1672
        logger.info("getNotMarkedUpFeatureObject");
1673
        Feature currentFeature = (Feature)importer.getTermService().find(NotMarkedUpUUID);
1674
        if (currentFeature == null) {
1675
            currentFeature=Feature.NewInstance(notMarkedUp, notMarkedUp, notMarkedUp);
1676
            currentFeature.setUuid(NotMarkedUpUUID);
1677
            //TODO use userDefined Feature Vocabulary
1678
            Feature.DISTRIBUTION().getVocabulary().addTerm(currentFeature);
1679
//            importer.getTermService().saveOrUpdate(currentFeature);
1680
            importer.getVocabularyService().saveOrUpdate(currentFeature.getVocabulary());
1681
        }
1682
        return currentFeature;
1683
    }
1684

    
1685
    /**
1686
     * @param references
1687
     * handle cases where the bibref are inside <p> and outside
1688
     */
1689
    @SuppressWarnings("rawtypes")
1690
    private void extractReferenceRawText(NodeList references, INonViralName nameToBeFilled, Reference refMods,
1691
            Taxon acceptedTaxon) {
1692
        logger.info("extractReferenceRawText");
1693
        String refString="";
1694
        currentMyName= new MyName(true);
1695
        for (int j=0;j<references.getLength();j++){
1696
            acceptedTaxon=CdmBase.deproxy(acceptedTaxon, Taxon.class);
1697
            //no bibref tag inside
1698
            //            System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1699
            if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1700

    
1701
                try {
1702
                	String followingText = null;  //needs to be checked if follText is possible
1703
                	//TODO create or not create?
1704
                    currentMyName = extractScientificName(references.item(j), refMods, followingText);
1705
                } catch (TransformerFactoryConfigurationError e) {
1706
                    logger.warn(e);
1707
                } catch (TransformerException e) {
1708
                    logger.warn(e);
1709
                }
1710

    
1711
                //                name=name.trim();
1712
            }
1713
            if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1714
                refString = references.item(j).getTextContent().trim();
1715
            }
1716
            if(references.item(j).getNodeName().equalsIgnoreCase("#text") && !references.item(j).getTextContent().trim().isEmpty()){
1717
                //
1718
               if (!currentMyName.getStatus().isEmpty()){
1719
            	   String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1720
	               	if (nomNovStatus != null){
1721
	               		nameToBeFilled.setAppendedPhrase(nomNovStatus);
1722
	               	}else{
1723
	            	   try {
1724
	                        NomenclaturalStatusType  statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1725
                            nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1726
	                    } catch (UnknownCdmTypeException e) {
1727
	                        addProblematicStatusToFile(currentMyName.getStatus());
1728
	                        logger.warn("Problem with status");
1729
	                    }
1730
	               	}
1731
                }
1732

    
1733
                String fullLineRefName = references.item(j).getTextContent().trim();
1734
                int nameOrRefOrOther=2;
1735
                nameOrRefOrOther=askIfNameContained(fullLineRefName);
1736
                if (nameOrRefOrOther==0){
1737
                    TaxonName nameTBF = currentMyName.getTaxonName();
1738
                    Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1739

    
1740
                    Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1741
                    //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1742
                    boolean synoExist = false;
1743
                    for (Synonym syn: synonymsSet){
1744
                        //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1745
                        boolean a =syn.getName().equals(synonym.getName());
1746
                        boolean b = syn.getSec().equals(synonym.getSec());
1747
                        if (a && b) {
1748
                            synoExist=true;
1749
                        }
1750
                    }
1751
                    if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1752
                        sourceHandler.addSource(refMods, synonym);
1753

    
1754
                        acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1755
                    }
1756
                }
1757

    
1758
                if (nameOrRefOrOther==1){
1759
                    Reference re = ReferenceFactory.newGeneric();
1760
                    re.setTitleCache(fullLineRefName, true);
1761

    
1762
                    /* TaxonName nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1763
                    if (nameTBF.hasProblem() &&
1764
                            !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1765
                        addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1766
                        nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1767
                    }
1768
                    nameTBF = getTaxonName(nameTBF,nametosave,statusType);
1769
                     */
1770
                    TaxonName nameTBF = currentMyName.getTaxonName();
1771
                    Synonym synonym = Synonym.NewInstance(nameTBF, re);
1772

    
1773
                    Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1774
                    //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1775
                    boolean synoExist = false;
1776
                    for (Synonym syn: synonymsSet){
1777
                        //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1778
                        boolean a =syn.getName().equals(synonym.getName());
1779
                        boolean b = syn.getSec().equals(synonym.getSec());
1780
                        if (a && b) {
1781
                            synoExist=true;
1782
                        }
1783
                    }
1784
                    if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1785
                        sourceHandler.addSource(refMods, synonym);
1786

    
1787
                        acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1788
                    }
1789

    
1790
                }
1791

    
1792

    
1793
                if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1794
                    setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1795
                }
1796
            }
1797

    
1798
            if(!currentMyName.getName().isEmpty()){
1799
                //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
1800
                if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName.getName().trim())){
1801
                    Reference refS = ReferenceFactory.newGeneric();
1802
                    refS.setTitleCache(refString, true);
1803
                    //                            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1804
                    //                            acceptedTaxon.addDescription(td);
1805
                    //                            acceptedTaxon.addSource(refSource);
1806
                    //
1807
                    //                            TextData textData = TextData.NewInstance(Feature.CITATION());
1808
                    //
1809
                    //                            textData.addSource(null, null, refS, null);
1810
                    //                            td.addElement(textData);
1811
                    //                            td.addSource(refSource);
1812
                    //                            importer.getDescriptionService().saveOrUpdate(td);
1813

    
1814

    
1815
                    if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1816
                        setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1817

    
1818
                    }
1819

    
1820
                    acceptedTaxon.getName().setNomenclaturalReference(refS);
1821
                }else{
1822
                    TaxonName nameTBF = currentMyName.getTaxonName();
1823
                    Synonym synonym = null;
1824
                    if (! currentMyName.getStatus().isEmpty()){
1825
                    	String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1826
                    	if (nomNovStatus != null){
1827
                    		nameToBeFilled.setAppendedPhrase(nomNovStatus);
1828
                    	}else{
1829
	                    	try {
1830
	                            NomenclaturalStatusType statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1831
	                            nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1832
	                            synonym = Synonym.NewInstance(nameTBF, refMods);
1833
	                        } catch (UnknownCdmTypeException e) {
1834
	                            addProblematicStatusToFile(currentMyName.getStatus());
1835
	                            logger.warn("Problem with status");
1836
	                            synonym = Synonym.NewInstance(nameTBF, refMods);
1837
	                            synonym.setAppendedPhrase(currentMyName.getStatus());
1838
	                        }
1839
                    	}
1840
                    }else{
1841
                        synonym =  Synonym.NewInstance(nameTBF, refMods);
1842
                    }
1843

    
1844

    
1845
                    if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1846
                        setLSID(currentMyName.getIdentifier(), synonym);
1847
                    }
1848

    
1849
                    Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1850
                    //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1851
                    boolean synoExist = false;
1852
                    for (Synonym syn: synonymsSet){
1853
                        //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1854
                        boolean a =syn.getName().equals(synonym.getName());
1855
                        boolean b = syn.getSec().equals(synonym.getSec());
1856
                        if (a && b) {
1857
                            synoExist=true;
1858
                        }
1859
                    }
1860
                    if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1861
                        sourceHandler.addSource(refMods, synonym);
1862

    
1863
                        acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1864
                    }
1865
                }
1866
            }
1867
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1868
        }
1869
    }
1870

    
1871

    
1872

    
1873
    /**
1874
     * @param identifier
1875
     * @param acceptedTaxon
1876
     */
1877
    @SuppressWarnings("rawtypes")
1878
    private void setLSID(String identifier, TaxonBase<?> taxon) {
1879
        //logger.info("setLSID");
1880
        //        boolean lsidok=false;
1881
        String id = identifier.split("__")[0];
1882
        String source = identifier.split("__")[1];
1883
        if (id.indexOf("lsid")>-1){
1884
            try {
1885
                LSID lsid = new LSID(id);
1886
                taxon.setLsid(lsid);
1887
                //                lsidok=true;
1888
            } catch (MalformedLSIDException e) {
1889
                logger.warn("Malformed LSID");
1890
            }
1891

    
1892
        }
1893

    
1894
        //logger.info("search reference for LSID");
1895
        //  if ((id.indexOf("lsid")<0) || !lsidok){
1896
        //ADD ORIGINAL SOURCE ID EVEN IF LSID
1897
        Reference re = null;
1898
        Pager<Reference> references = importer.getReferenceService().findByTitleWithRestrictions(Reference.class, source, MatchMode.EXACT, null, 1, null, null, null);
1899
        if( references !=null && references.getCount()>0){
1900
            re=references.getRecords().get(0);
1901
        }
1902
        //logger.info("search reference for LSID-end");
1903
        if(re == null){
1904
            re = ReferenceFactory.newGeneric();
1905
            re.setTitleCache(source, true);
1906
            importer.getReferenceService().saveOrUpdate(re);
1907
        }
1908
        re=CdmBase.deproxy(re, Reference.class);
1909

    
1910
        //logger.info("search source for LSID");
1911
        Set<IdentifiableSource> sources = taxon.getSources();
1912
        boolean lsidinsource=false;
1913
        boolean urlinsource=false;
1914
        for (IdentifiableSource src:sources){
1915
            if (id.equalsIgnoreCase(src.getIdInSource()) && re.getTitleCache().equals(src.getCitation().getTitleCache())) {
1916
                lsidinsource=true;
1917
            }
1918
            if (src.getIdInSource() == null && re.getTitleCache().equals(sourceUrlRef.getTitleCache())) {
1919
                urlinsource=true;
1920
            }
1921
        }
1922
        if(!lsidinsource) {
1923
            taxon.addSource(OriginalSourceType.Import, id,null,re,null);
1924
        }
1925
        if(!urlinsource)
1926
        {
1927
            sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
1928
            taxon.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
1929
            // }
1930
        }
1931

    
1932
    }
1933

    
1934
    /**
1935
     * try to solve a parsing problem for a scientific name
1936
     * @param original : the name from the OCR document
1937
     * @param name : the tagged version
1938
     * @param parser
1939
     * @return the corrected TaxonName
1940
     */
1941
    /*   @SuppressWarnings({ "unchecked", "rawtypes" })
1942
    private TaxonName solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
1943
        Map<String,String> ato = namesMap.get(original);
1944
        if (ato == null) {
1945
            ato = namesMap.get(original+" "+author);
1946
        }
1947

    
1948

    
1949
        if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
1950
            rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1951
        }
1952
        if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
1953
            rank = getRank(ato);
1954
        }
1955
        //        TaxonName nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1956
        TaxonName nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1957
        //                logger.info("RANK: "+rank);
1958
        int retry=0;
1959
        List<ParserProblem> problems = nameTBF.getParsingProblems();
1960
        for (ParserProblem pb:problems) {
1961
            System.out.println(pb.toString());
1962
        }
1963
        while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1964
            addProblemNameToFile(name,author,nomenclaturalCode,rank);
1965
            String fullname=name;
1966
            if(! skippQuestion) {
1967
                fullname =  getFullReference(name,nameTBF.getParsingProblems());
1968
            }
1969
            if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1970
                nameTBF = TaxonNameFactory.NewBotanicalInstance(null);
1971
            }
1972
            if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1973
                nameTBF = TaxonNameFactory.NewZoologicalInstance(null);
1974
            }
1975
            if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1976
                nameTBF= TaxonNameFactory.NewBacterialInstance(null);
1977
            }
1978
            parser.parseReferencedName(nameTBF, fullname, rank, false);
1979
            retry++;
1980
        }
1981
        if (retry == 1){
1982
            if(author != null){
1983
                if (name.indexOf(author)>-1) {
1984
                    nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
1985
                } else {
1986
                    nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1987
                }
1988
                if (nameTBF.hasProblem()){
1989
                    if (name.indexOf(author)>-1) {
1990
                        addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
1991
                    } else {
1992
                        addProblemNameToFile(name,author,nomenclaturalCode,rank);
1993
                    }
1994
                    //                    System.out.println("TBF still has problems "+nameTBF.hasProblem());
1995
                    problems = nameTBF.getParsingProblems();
1996
                    for (ParserProblem pb:problems) {
1997
                        System.out.println(pb.toString());
1998
                    }
1999
                    nameTBF.setFullTitleCache(name, true);
2000
                }else{
2001
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2002
                        ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2003
                    }
2004
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2005
                        ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2006
                    }
2007
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2008
                        ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2009
                    }
2010
                }
2011
                //                    logger.info("FULL TITLE CACHE "+name);
2012
            }else{
2013
                nameTBF.setFullTitleCache(name, true);
2014
            }
2015
        }
2016
        return nameTBF;
2017
    }
2018

    
2019
     */
2020

    
2021
    /**
2022
     * @param nomenclatureNode: the XML nodes
2023
     * @param nametosave: the list of objects to save into the CDM
2024
     * @param refMods: the current reference extracted from the MODS
2025
     * @return
2026
     */
2027
    @SuppressWarnings({ "rawtypes" })
2028
    private Taxon extractNomenclature(Node nomenclatureNode,  List<TaxonName> nametosave, Reference refMods) throws ClassCastException{
2029
        refMods=CdmBase.deproxy(refMods, Reference.class);
2030

    
2031
        logger.info("extractNomenclature");
2032
        NodeList children = nomenclatureNode.getChildNodes();
2033
        String freetext="";
2034
        Taxon acceptedTaxon = null;
2035
        //   INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2036

    
2037
        //        String fullContent = nomenclatureNode.getTextContent();
2038

    
2039
        NomenclaturalStatusType statusType = null;
2040
        String newNameStatus = null;
2041
        //TODO
2042
        for (int i=0;i<children.getLength();i++){
2043
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:status")){
2044
                String status = children.item(i).getTextContent().trim();
2045

    
2046
                if (!status.isEmpty()){
2047
                	if (newNameStatus(status) != null){
2048
                		newNameStatus = newNameStatus(status);
2049
                    }else{
2050
	                    try {
2051
	                        statusType = nomStatusString2NomStatus(status);
2052
	                    } catch (UnknownCdmTypeException e) {
2053
	//                    	nomNovStatus;
2054
	                    	addProblematicStatusToFile(status);
2055
	                        logger.warn("Problem with status: " + status);
2056
	                    }
2057
                    }
2058
                }
2059
            }
2060
        }
2061

    
2062
        boolean containsSynonyms=false;
2063
        boolean wasSynonym = false;
2064
        usedFollowingTextPrefix = null;  //reset
2065

    
2066
        for (int i=0; i<children.getLength(); i++){
2067
        	Node childNode = children.item(i);
2068
        	String childName = childNode.getNodeName();
2069

    
2070

    
2071
        	//following text
2072
        	followingText = null;
2073
        	if ( i + 1 < children.getLength()){
2074
            	Node followingTextNode = children.item(i +1);
2075
            	if (followingTextNode.getNodeName().equals("#text") && !followingTextNode.getTextContent().matches("\\s*") ){
2076
            		followingText = followingTextNode.getTextContent();
2077
            	}
2078
        	}
2079

    
2080
        	//traverse nodes
2081
            if (childName.equalsIgnoreCase("#text")) {
2082
                freetext = childNode.getTextContent().trim();
2083
                if (usedFollowingTextPrefix != null && freetext.startsWith(usedFollowingTextPrefix)){
2084
                	freetext = freetext.substring(usedFollowingTextPrefix.length());
2085
                }
2086
                usedFollowingTextPrefix = null;  //reset
2087
            }else if (childName.equalsIgnoreCase("tax:collection_event")) {
2088
                //                System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2089
                extractMaterialsDirect(childNode, acceptedTaxon, refMods, "collection", currentMyName.getTaxonName());
2090
            }else if(childName.equalsIgnoreCase("tax:name")){
2091
                INonViralName nameToBeFilled;
2092
                //System.out.println("HANDLE FIRST NAME OF THE LIST");
2093
                if(!containsSynonyms){
2094
                	wasSynonym = false;
2095

    
2096
                	//System.out.println("I : "+i);
2097
                    currentMyName = new MyName(false);
2098
                    try {
2099
                        currentMyName = extractScientificName(childNode, refMods, followingText);
2100
                        treatmentMainName = currentMyName.getNewName();
2101
                        originalTreatmentName = currentMyName.getOriginalName();
2102

    
2103
                    } catch (TransformerFactoryConfigurationError e1) {
2104
                        throw new RuntimeException(e1);
2105
                    } catch (TransformerException e1) {
2106
                    	throw new RuntimeException(e1);
2107
                    }
2108

    
2109
                    if (currentMyName.getRank().equals(Rank.UNKNOWN_RANK()) || currentMyName.getRank().isLower(state2.getConfig().getMaxRank()) || currentMyName.getRank().equals(state2.getConfig().getMaxRank())){
2110
                        maxRankRespected=true;
2111

    
2112
                        nameToBeFilled=currentMyName.getTaxonName();
2113

    
2114
                        //   acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2115
                        acceptedTaxon=currentMyName.getTaxon();
2116
                        //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2117

    
2118

    
2119
                        boolean statusMatch=false;
2120
                        if(acceptedTaxon !=null ){
2121
                            acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2122
                            statusMatch=compareStatus(acceptedTaxon, statusType);
2123
                            //System.out.println("statusMatch: "+statusMatch);
2124
                        }
2125
                        if (acceptedTaxon ==null || (acceptedTaxon != null && !statusMatch)){
2126

    
2127
                            nameToBeFilled=currentMyName.getTaxonName();
2128
                            if (nameToBeFilled != null){
2129
                                if (!originalTreatmentName.isEmpty()) {
2130
                                    TaxonNameDescription td = TaxonNameDescription.NewInstance();
2131
                                    td.setTitleCache(originalTreatmentName, true);
2132
                                    nameToBeFilled.addDescription(td);
2133
                                }
2134

    
2135
                                if(statusType != null) {
2136
                                    nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2137
                                }
2138
                                if(newNameStatus != null){
2139
                                	nameToBeFilled.setAppendedPhrase(newNameStatus);
2140
                                }
2141
                                sourceHandler.addSource(refMods, TaxonName.castAndDeproxy(nameToBeFilled));
2142

    
2143
                                if (nameToBeFilled.getNomenclaturalReference() == null) {
2144
                                    acceptedTaxon= Taxon.NewInstance(nameToBeFilled,refMods);
2145
                                    //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2146
                                }
2147
                                else {
2148
                                    acceptedTaxon= Taxon.NewInstance(nameToBeFilled,nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
2149
                                    //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2150
                                }
2151

    
2152
                                sourceHandler.addSource(refMods, acceptedTaxon);
2153

    
2154
                                if(!state2.getConfig().doKeepOriginalSecundum()) {
2155
                                    acceptedTaxon.setSec(state2.getConfig().getSecundum());
2156
                                    //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2157
                                    //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2158
                                }
2159

    
2160
                                if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2161
                                    setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2162
                                }
2163

    
2164

    
2165
                                importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2166
                                acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2167
                            }
2168

    
2169
                        }else{
2170
                            acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2171
                            Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2172
                            boolean sourcelinked=false;
2173
                            for (IdentifiableSource source:sources){
2174
                                if (source.getCitation().getTitleCache().equalsIgnoreCase(refMods.getTitleCache())) {
2175
                                    sourcelinked=true;
2176
                                }
2177
                            }
2178
                            if (!state2.getConfig().doKeepOriginalSecundum()) {
2179
                                acceptedTaxon.setSec(state2.getConfig().getSecundum());
2180
                                //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2181
                                //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2182
                            }
2183
                            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2184

    
2185
                            if (!sourcelinked){
2186
                                sourceHandler.addSource(refMods, acceptedTaxon);
2187
                            }
2188
                            if (!sourcelinked || !state2.getConfig().doKeepOriginalSecundum()){
2189

    
2190
                                if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2191
                                    //FIXME are these identifiers really related to taxa, not names? Exiting LSIDs come from Zoobank, urn:lsid:biosci.ohio-state.edu:osuc_concepts:134826 (Ants)
2192
                                	setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2193
                                }
2194
                                importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2195
                            }
2196
                        }
2197
                    }else{
2198
                        maxRankRespected=false;
2199
                    }
2200
                    containsSynonyms=true;  //all folowing names are handled as synonyms
2201
                }else{
2202
                    try{
2203
                        extractSynonyms(childNode, acceptedTaxon, refMods, followingText);
2204
                        wasSynonym = true;
2205

    
2206
                    }catch(NullPointerException e){
2207
                        logger.warn("null pointer exception, the accepted taxon might be null");
2208
                    }
2209
                }
2210
                containsSynonyms=true;
2211
            }else if (childName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
2212
                reloadClassification();
2213
                //extract the References within the document
2214
                extractReferences(childNode,nametosave,acceptedTaxon,refMods);
2215
            }else if (childName.equalsIgnoreCase("tax:bibref")){
2216
            	logger.warn(childName + " still preliminary");
2217

    
2218
            	TaxonName currentName = currentMyName == null ? null : currentMyName.getTaxonName();
2219
            	boolean handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2220
            	if (! handled){
2221
            		setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2222
            	}
2223
            }else{
2224
            	logger.warn(childName + " not yet handled");
2225
            }
2226
            if(!stringIsEmpty(freetext.trim())) {;
2227
                if (! freetext.matches("\\d\\.?")){
2228
                    TaxonName currentName = currentMyName == null ? null : currentMyName.getTaxonName();
2229
                	boolean handled = false;
2230
                	if (currentName != null && !wasSynonym){
2231
                		handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2232
                	}
2233
                	if (! handled){
2234
                		setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2235
                	}
2236
                }
2237

    
2238
                 freetext = "";
2239
            }
2240

    
2241
        }
2242
        //importer.getClassificationService().saveOrUpdate(classification);
2243
        return acceptedTaxon;
2244
    }
2245

    
2246

    
2247

    
2248

    
2249
	/**
2250
     * @return
2251
     */
2252

    
2253
    private boolean compareStatus(TaxonBase<?> t, NomenclaturalStatusType statusType) {
2254
        //logger.info("compareStatus");
2255
        boolean statusMatch=false;
2256
        //found one taxon
2257
        Set<NomenclaturalStatus> status = t.getName().getStatus();
2258
        if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
2259
            for (NomenclaturalStatus st:status){
2260
                NomenclaturalStatusType stype = st.getType();
2261
                if (stype.toString().equalsIgnoreCase(statusType.toString())) {
2262
                    statusMatch=true;
2263
                }
2264
            }
2265
        }
2266
        else{
2267
            if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
2268
                statusMatch=true;
2269
            }
2270
        }
2271
        return statusMatch;
2272
    }
2273

    
2274
    /**
2275
     * @param acceptedTaxon: the current acceptedTaxon
2276
     * @param ref: the current reference extracted from the MODS
2277
     * @return the parent for the current accepted taxon
2278
     */
2279
    /*  private Taxon createParent(Taxon acceptedTaxon, Reference ref) {
2280
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2281

    
2282
        List<Rank> rankList = new ArrayList<Rank>();
2283
        rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2284

    
2285
        List<String> rankListStr = new ArrayList<String>();
2286
        for (Rank r:rankList) {
2287
            rankListStr.add(r.toString());
2288
        }
2289
        String r="";
2290
        String s = acceptedTaxon.getTitleCache();
2291
        Taxon tax = null;
2292
        if(!skippQuestion){
2293
            int addTaxon = askAddParent(s);
2294
            logger.info("ADD TAXON: "+addTaxon);
2295
            if (addTaxon == 0 ){
2296
                Taxon tmp = askParent(acceptedTaxon, classification);
2297
                if (tmp == null){
2298
                    s = askSetParent(s);
2299
                    r = askRank(s,rankListStr);
2300

    
2301
                    TaxonName nameToBeFilled = null;
2302
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2303
                        nameToBeFilled = TaxonNameFactory.NewBotanicalInstance(null);
2304
                    }
2305
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2306
                        nameToBeFilled = TaxonNameFactory.NewZoologicalInstance(null);
2307
                    }
2308
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2309
                        nameToBeFilled = TaxonNameFactory.NewBacterialInstance(null);
2310
                    }
2311
                    nameToBeFilled.setTitleCache(s, true);
2312
                    nameToBeFilled.setRank(getRank(r), true);
2313

    
2314
                    tax = Taxon.NewInstance(nameToBeFilled, ref);
2315
                }
2316
                else{
2317
                    tax=tmp;
2318
                }
2319

    
2320
                createParent(tax, ref);
2321
                //            logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2322
                classification.addParentChild(tax, acceptedTaxon, ref, null);
2323
            }
2324
            else{
2325
                classification.addChildTaxon(acceptedTaxon, ref, null);
2326
                tax=acceptedTaxon;
2327
            }
2328
        } else{
2329
            classification.addChildTaxon(acceptedTaxon, ref, null);
2330
            tax=acceptedTaxon;
2331
        }
2332
        //        logger.info("RETURN: "+tax );
2333
        return tax;
2334

    
2335
    }
2336

    
2337
     */
2338

    
2339

    
2340
    private MyName  extractScientificNameSynonym(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2341
        //System.out.println("extractScientificNameSynonym");
2342
        logger.info("extractScientificNameSynonym");
2343
        String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2344
        List<String> rankListToPrint = new ArrayList<String>();
2345
        for (String r : rankListToPrint_tmp) {
2346
            rankListToPrint.add(r.toLowerCase());
2347
        }
2348

    
2349
        Rank rank = Rank.UNKNOWN_RANK();
2350
        NodeList children = name.getChildNodes();
2351
        String originalName="";
2352
        String fullName = "";
2353
        String newName="";
2354
        String identifier="";
2355
        HashMap<String, String> atomisedMap = new HashMap<String, String>();
2356
        List<String> atomisedName= new ArrayList<String>();
2357

    
2358
        String rankStr = "";
2359
        Rank tmpRank ;
2360

    
2361
        String status= extractStatus(children);
2362

    
2363
        for (int i=0;i<children.getLength();i++){
2364
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2365
                NodeList atom = children.item(i).getChildNodes();
2366
                for (int k=0;k<atom.getLength();k++){
2367
                    identifier = extractIdentifier(identifier, atom.item(k));
2368
                    tmpRank = null;
2369
                    rankStr = atom.item(k).getNodeName().toLowerCase();
2370
                    //                    logger.info("RANKSTR:*"+rankStr+"*");
2371
                    if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2372
                        rankStr=atom.item(k).getTextContent().trim();
2373
                        tmpRank = getRank(rankStr);
2374
                    }
2375
                    //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2376
                    if (tmpRank != null){
2377
                        rank=tmpRank;
2378
                    }
2379
                    atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2380
                }
2381
                addAtomisedNamesToMap(rankListToPrint, rank, atomisedName, atom);
2382
            }
2383
            if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2384
                //                logger.info("name non atomised: "+children.item(i).getTextContent());
2385
                fullName = children.item(i).getTextContent().trim();
2386
                //                logger.info("fullname: "+fullName);
2387
            }
2388
        }
2389
        originalName=fullName;
2390
        fullName = cleanName(fullName, atomisedName);
2391
        namesMap.put(fullName,atomisedMap);
2392

    
2393
        String atomisedNameStr = getAtomisedNameStr(atomisedName);
2394

    
2395
        if (fullName != null){
2396
            //            System.out.println("fullname: "+fullName);
2397
            //            System.out.println("atomised: "+atomisedNameStr);
2398
            if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2399
                if (skippQuestion){
2400
                    //                    String defaultN = "";
2401
                    if (atomisedNameStr.length()>fullName.length()) {
2402
                        newName=atomisedNameStr;
2403
                    } else {
2404
                        if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2405
                            newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2406
                        } else {
2407
                            newName=fullName;
2408
                        }
2409
                    }
2410
                } else {
2411
                    newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2412
                }
2413
            } else {
2414
                newName=fullName;
2415
            }
2416
        }
2417
        //not really needed
2418
        //        rank = askForRank(newName, rank, nomenclaturalCode);
2419
        //        System.out.println("atomised: "+atomisedMap.toString());
2420

    
2421
        //        String[] names = new String[5];
2422
        MyName myname = new MyName(true);
2423

    
2424
        //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2425
        //        System.out.println(atomisedMap.keySet());
2426
        fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2427
        myname.setOriginalName(fullName);
2428
        myname.setNewName(newName);
2429
        myname.setRank(rank);
2430
        myname.setIdentifier(identifier);
2431
        myname.setStatus(status);
2432
        myname.setSource(refMods);
2433

    
2434
        //        boolean higherAdded=false;
2435

    
2436

    
2437
        boolean parseNameManually=false;
2438
        INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
2439
        TaxonName nameToBeFilledTest ;
2440

    
2441
        //if selected the atomised version
2442
        if(newName==atomisedNameStr){
2443
            nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2444
            if (nameToBeFilledTest.hasProblem()){
2445
                addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2446
                nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode, rank);
2447
                if (nameToBeFilledTest.hasProblem()){
2448
                    addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2449
                    parseNameManually=true;
2450
                }
2451
            }
2452
        }else{
2453
            nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2454
            if (nameToBeFilledTest.hasProblem()){
2455
                addProblemNameToFile("fullversion",fullName, nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2456
                nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode,rank);
2457
                parseNameManually=true;
2458
                if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2459
                    addNameDifferenceToFile(originalName,atomisedNameStr);
2460
                }
2461
            }
2462
        }
2463

    
2464
        if(parseNameManually){
2465
            //System.out.println("DO IT MANUALLY");
2466
        	if (this.state2.getConfig().isUseOldUnparsedSynonymExtraction()){
2467
                createUnparsedSynonym(rank, newName, atomisedMap, myname);
2468
        	}else{
2469
        		createUnparsedSynonymNew(rank, newName, atomisedMap, myname, refMods);;
2470
        	}
2471
        } else{
2472
            //System.out.println("AUTOMATIC!");
2473
            //            createAtomisedTaxonString(newName, atomisedMap, myname);
2474
            myname.setParsedName(nameToBeFilledTest);
2475
            myname.buildTaxon();
2476
        }
2477
        //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2478
        return myname;
2479
    }
2480

    
2481

    
2482
	/**
2483
     * @param name
2484
     * @throws TransformerFactoryConfigurationError
2485
     * @throws TransformerException
2486
     * @return a list of possible names
2487
     */
2488
    @SuppressWarnings({"rawtypes" })
2489
    private MyName extractScientificName(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2490
        logger.info("extractScientificName");
2491

    
2492
        String[] rankListToPrintLowerCase_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2493
        List<String> rankListToPrint = Arrays.asList(rankListToPrintLowerCase_tmp);
2494

    
2495
        Rank rank = Rank.UNKNOWN_RANK();
2496
        NodeList children = name.getChildNodes();
2497
        String originalName = "";
2498
        String fullName = "";
2499
        String newName = "";
2500
        String identifier = "";
2501
        HashMap<String, String> atomisedMap = new HashMap<String, String>();
2502
        List<String> atomisedNameList= new ArrayList<String>();
2503

    
2504
        String status= extractStatus(children);
2505

    
2506
        for (int i=0;i<children.getLength();i++){
2507
        	Node nameChild = children.item(i);
2508
            if(nameChild.getNodeName().equalsIgnoreCase("tax:xmldata")){
2509
                NodeList xmlDataChildren = nameChild.getChildNodes();
2510
                for (int k=0;k<xmlDataChildren.getLength();k++){
2511
                	Node xmlDataChild = xmlDataChildren.item(k);
2512
                    identifier = extractIdentifier(identifier, xmlDataChild);
2513
                    String rankStr = xmlDataChild.getNodeName().toLowerCase();
2514
                    if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2515
                        rankStr=xmlDataChild.getTextContent().trim();
2516
                        Rank tmpRank = getRank(rankStr);
2517
                        if (tmpRank != null){
2518
                            rank=tmpRank;
2519
                        }
2520
                    }
2521
                    //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2522

    
2523
                    atomisedMap.put(rankStr.toLowerCase(),xmlDataChild.getTextContent().trim());
2524
                }
2525
                addAtomisedNamesToMap(rankListToPrint, rank, atomisedNameList, xmlDataChildren);
2526
            }
2527
            else if(nameChild.getNodeName().equalsIgnoreCase("#text") && ! nameChild.getTextContent().matches("\\s*")){
2528
                //                logger.info("name non atomised: "+children.item(i).getTextContent());
2529
                fullName = nameChild.getTextContent().trim();
2530
                //                logger.info("fullname: "+fullName);
2531
            }
2532
        }
2533
        originalName=fullName;
2534
        fullName = cleanName(fullName, atomisedNameList);
2535
        namesMap.put(fullName,atomisedMap);
2536

    
2537
        String atomisedNameStr = getAtomisedNameStr(atomisedNameList);
2538

    
2539
        if (fullName != null){
2540
            if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2541
                if (skippQuestion){
2542
                    if (atomisedNameStr.length()>fullName.length()) {
2543
                        newName = atomisedNameStr;
2544
                    } else {
2545
                        if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2546
                            newName = askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2547
                        } else {
2548
                            newName = fullName;
2549
                        }
2550
                    }
2551
                } else {
2552
                    newName=askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2553
                }
2554
            } else {
2555
                newName=fullName;
2556
            }
2557
        }
2558
        //not really needed
2559
        //        rank = askForRank(newName, rank, nomenclaturalCode);
2560
        //        System.out.println("atomised: "+atomisedMap.toString());
2561

    
2562
        //        String[] names = new String[5];
2563
        MyName myname = new MyName(false);
2564

    
2565
        //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2566
        //        System.out.println(atomisedMap.keySet());
2567
        fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2568
        myname.setOriginalName(fullName);
2569
        myname.setNewName(newName);
2570

    
2571
        myname.setRank(rank);
2572
        myname.setIdentifier(identifier);
2573
        myname.setStatus(status);
2574
        myname.setSource(refMods);
2575

    
2576
        //        boolean higherAdded=false;
2577

    
2578

    
2579
        boolean parseNameManually=false;
2580
        INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2581
        TaxonName  nameToBeFilledTest = null;
2582

    
2583
        //if selected the atomised version
2584
        if(newName==atomisedNameStr){
2585
            nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2586
            if (nameToBeFilledTest.hasProblem()){
2587
        	    addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2588
                nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode,rank);
2589
                if (nameToBeFilledTest.hasProblem()){
2590
                    addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2591
                    parseNameManually=true;
2592
                }
2593
            }
2594
        }else{
2595
            nameToBeFilledTest = parseWithExtension(parser, fullName , rank, followingText, atomisedMap);
2596
            if (nameToBeFilledTest.hasProblem()){
2597
                addProblemNameToFile("fullversion",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2598
                nameToBeFilledTest = (TaxonName)parser.parseFullName(fullName, nomenclaturalCode,rank);
2599
                parseNameManually=true;
2600
                if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2601
                    addNameDifferenceToFile(originalName,atomisedNameStr);
2602
                }
2603
            }
2604
        }
2605

    
2606
        //System.out.println("parseNameManually: "+parseNameManually);
2607
        if(parseNameManually){
2608
            createAtomisedTaxon(rank, newName, atomisedMap, myname);
2609
        }
2610
        else{
2611
            createAtomisedTaxonString(newName, atomisedMap, myname);
2612
            myname.setParsedName(nameToBeFilledTest);
2613
            //TODO correct handling of createIfNotExists
2614
           	myname.buildTaxon();
2615
        }
2616
        return myname;
2617

    
2618
    }
2619

    
2620
    private TaxonName parseWithExtension(INonViralNameParser parser, String atomisedNameStr, Rank rank, String followingText, HashMap<String, String> atomisedMap) {
2621
    	Object[] nameExtensionResult = getPossibleExtension(followingText, atomisedMap, nomenclaturalCode);
2622

    
2623
    	TaxonName name = (TaxonName)parser.parseFullName(atomisedNameStr, nomenclaturalCode, rank);
2624
    	if (nameExtensionResult != null && nameExtensionResult[0] != null){
2625
    		String ext = (String)nameExtensionResult[0];
2626
    		TaxonName extName = (TaxonName)parser.parseFullName(atomisedNameStr + " " + ext, nomenclaturalCode, rank);
2627
    		if (! extName.hasProblem()){
2628
    			name = extName;
2629
    			this.usedFollowingTextPrefix = ext;
2630
    			//TODO do we need to fill the atomisedMap at all?
2631
    			if ((Boolean)(nameExtensionResult[1])){
2632
    				//TODO
2633
    			}
2634
    			if ((Boolean)(nameExtensionResult[2])){
2635
    				//TODO BasionymYear etc.
2636
    				Integer origYear = name.getPublicationYear();
2637
    				if (origYear != null){
2638
        				atomisedMap.put(PUBLICATION_YEAR, origYear.toString());
2639
    				}
2640
    			}
2641
    		}
2642
    	}
2643
		return name;
2644
	}
2645

    
2646
	private Object[] getPossibleExtension(String followingText, HashMap<String, String> atomisedMap, NomenclaturalCode nomenclaturalCode) {
2647
		if (StringUtils.isBlank(followingText)){
2648
			return null;
2649
		}
2650

    
2651
    	boolean includeAuthor = true;
2652
    	boolean includeYear = false;
2653
		if (atomisedMap.containsKey("dwc:scientificnameauthorship")){
2654
			includeAuthor = false;
2655
		}
2656
    	if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2657
    		includeYear = true;
2658
    	}
2659
    	String patternStr = "";
2660
    	if (includeAuthor){
2661
    		patternStr += NonViralNameParserImplRegExBase.capitalWord;
2662
    	}
2663
    	if (includeYear){
2664
    		patternStr += "\\s*(,|\\s+)\\s*" + "(17|18|19|20)" + "\\d{2}" ;
2665
    	}
2666
    	String match = null;
2667
    	if (! patternStr.isEmpty()){
2668
    		Pattern pattern = Pattern.compile("^" + patternStr);
2669
    		Matcher matcher = pattern.matcher(followingText.trim());
2670
    		if (matcher.find()){
2671
    			match = matcher.group();
2672
    		}
2673
    	}
2674

    
2675
		return new Object[]{match, includeAuthor, includeYear};
2676
	}
2677

    
2678
	/**
2679
     * @param atomisedName
2680
     * @return
2681
     */
2682
    private String getAtomisedNameStr(List<String> atomisedName) {
2683
        //logger.info("getAtomisedNameStr");
2684
        String atomisedNameStr = StringUtils.join(atomisedName," ");
2685
        while(atomisedNameStr.contains("  ")) {
2686
            atomisedNameStr=atomisedNameStr.replace("  ", " ");
2687
        }
2688
        atomisedNameStr=atomisedNameStr.trim();
2689
        return atomisedNameStr;
2690
    }
2691

    
2692
    /**
2693
     * @param children
2694
     * @param status
2695
     * @return
2696
     */
2697
    private String extractStatus(NodeList children) {
2698
        logger.info("extractStatus");
2699
        String status="";
2700
        for (int i=0;i<children.getLength();i++){
2701
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:status") ||
2702
                    (children.item(i).getNodeName().equalsIgnoreCase("tax:namePart") &&
2703
                            children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2704
                status = children.item(i).getTextContent().trim();
2705
            }
2706
        }
2707
        return status;
2708
    }
2709

    
2710
    /**
2711
     * @param identifier
2712
     * @param atom
2713
     * @param k
2714
     * @return
2715
     */
2716
    private String extractIdentifier(String identifier, Node atom) {
2717
        //logger.info("extractIdentifier");
2718
        if (atom.getNodeName().equalsIgnoreCase("tax:xid")){
2719
            try{
2720
                identifier = atom.getAttributes().getNamedItem("identifier").getNodeValue();
2721
            }catch(Exception e){
2722
                System.out.println("pb with identifier, maybe empty");
2723
            }
2724
            try{
2725
                identifier+="__"+atom.getAttributes().getNamedItem("source").getNodeValue();
2726
            }catch(Exception e){
2727
                System.out.println("pb with identifier, maybe empty");
2728
            }
2729
        }
2730
        return identifier;
2731
    }
2732

    
2733
    /**
2734
     * @param rankListToPrint
2735
     * @param rank
2736
     * @param atomisedName
2737
     * @param atom
2738
     */
2739
    private void addAtomisedNamesToMap(List<String> rankListToPrint, Rank rank, List<String> atomisedName, NodeList atom) {
2740
        logger.info("addAtomisedNamesToMap");
2741
        for (int k=0;k<atom.getLength();k++){
2742
        	Node node = atom.item(k);
2743
        	String nodeName = node.getNodeName();
2744
            if (! nodeName.equalsIgnoreCase("dwc:taxonRank") ) {  //rank has been handled in higher method
2745
                if (nodeName.equalsIgnoreCase("dwc:subgenus") || nodeName.equalsIgnoreCase("dwcranks:subgenus")) {
2746
                    atomisedName.add("("+ node.getTextContent().trim()+")");
2747
                } else if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet") || nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2748
                       	if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet")){
2749
                            atomisedName.add("var. "+node.getTextContent().trim());
2750
                        }else if(nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2751
                            atomisedName.add("subsp. "+atom.item(k).getTextContent().trim());
2752
                        }
2753
                } else if(rankListToPrint.contains(nodeName.toLowerCase())) {
2754
                    atomisedName.add(node.getTextContent().trim());
2755
                } else{
2756
                    if (rank.isHigher(Rank.GENUS()) && (nodeName.indexOf("dwcranks:")>-1 || nodeName.indexOf("dwc:Family")>-1)) {
2757
                        atomisedName.add(node.getTextContent().trim());
2758
                    }else if (nodeName.equals("#text")){
2759
                    	String text = node.getTextContent();
2760
                    	if (StringUtils.isNotBlank(text)){
2761
                    		//TODO handle text
2762
                    		logger.warn("name xmldata contains text. This is unhandled");
2763
                    	}
2764
                    }else if (nodeName.matches("(?i)(dwc:Kingdom|dwc:Class|dwc:Order|dwc:Family)")){
2765
                    	//we currently do not use higher ranks information
2766
                    }else{
2767
                    	//TODO handle unhandled node
2768
                    	logger.warn("Unhandled node: " + nodeName);
2769
                    }
2770
                }
2771
            }
2772
        }
2773
    }
2774

    
2775
    /**
2776
     * @param fullName
2777
     * @param atomisedName
2778
     * @return
2779
     */
2780
    private String cleanName(String name, List<String> atomisedName) {
2781
        //logger.info("cleanName");
2782
        String fullName =name;
2783
        if (fullName != null){
2784
            fullName = fullName.replace("( ", "(");
2785
            fullName = fullName.replace(" )",")");
2786

    
2787
            if (fullName.trim().isEmpty()){
2788
                fullName=StringUtils.join(atomisedName," ");
2789
            }
2790

    
2791
            while(fullName.contains("  ")) {
2792
                fullName=fullName.replace("  ", " ");
2793
                //            logger.info("while");
2794
            }
2795
            fullName=fullName.trim();
2796
        }
2797
        return fullName;
2798
    }
2799

    
2800
    /**
2801
     * @param rank
2802
     * @param fullName
2803
     * @param atomisedMap
2804
     * @param myname
2805
     * @return
2806
     */
2807
    private String extractAuthorFromNames(Rank rank, String name, HashMap<String, String> atomisedMap, MyName myname) {
2808
        logger.info("extractAuthorFromNames");
2809
        String fullName=name;
2810
        if (atomisedMap.get("dwc:scientificnameauthorship") == null && fullName!=null){
2811
            //            System.out.println("rank : "+rank.toString());
2812
            if(rank.isHigher(Rank.SPECIES())){
2813
                try{
2814
                    String author=null;
2815
                    if(atomisedMap.get("dwcranks:subgenus") != null) {
2816
                        author = fullName.split(atomisedMap.get("dwcranks:subgenus"))[1].trim();
2817
                    }
2818
                    if(atomisedMap.get("dwc:subgenus") != null) {
2819
                        author = fullName.split(atomisedMap.get("dwc:subgenus"))[1].trim();
2820
                    }
2821
                    if(author == null) {
2822
                        if(atomisedMap.get("dwc:genus") != null) {
2823
                            author = fullName.split(atomisedMap.get("dwc:genus"))[1].trim();
2824
                        }
2825
                    }
2826
                    if(author != null){
2827
                        fullName = fullName.substring(0, fullName.indexOf(author));
2828
                        author=author.replaceAll(",","").trim();
2829
                        myname.setAuthor(author);
2830
                    }
2831
                }catch(Exception e){
2832
                    //could not extract the author
2833
                }
2834
            }
2835
            if(rank.equals(Rank.SPECIES())){
2836
                try{
2837
                    String author=null;
2838
                    if(author == null) {
2839
                        if(atomisedMap.get("dwc:species") != null) {
2840
                            String[] t = fullName.split(atomisedMap.get("dwc:species"));
2841
                            //                            System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2842
                            author = fullName.split(atomisedMap.get("dwc:species"))[1].trim();
2843
                            //                            System.out.println("AUTEUR "+author);
2844
                        }
2845
                    }
2846
                    if(author != null){
2847
                        fullName = fullName.substring(0, fullName.indexOf(author));
2848
                        author=author.replaceAll(",","").trim();
2849
                        myname.setAuthor(author);
2850
                    }
2851
                }catch(Exception e){
2852
                    //could not extract the author
2853
                }
2854
            }
2855
        }else{
2856
            myname.setAuthor(atomisedMap.get("dwc:scientificnameauthorship"));
2857
        }
2858
        return fullName;
2859
    }
2860

    
2861
    /**
2862
     * @param newName
2863
     * @param atomisedMap
2864
     * @param myname
2865
     */
2866
    private void createAtomisedTaxonString(String newName, HashMap<String, String> atomisedMap, MyName myname) {
2867
        logger.info("createAtomisedTaxonString "+atomisedMap);
2868
        if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
2869
            myname.setFamilyStr(atomisedMap.get("dwc:family"));
2870
        }
2871
        if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY())){
2872
            myname.setSubfamilyStr(atomisedMap.get("dwcranks:subfamily"));
2873
        }
2874
        if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
2875
            myname.setTribeStr(atomisedMap.get("dwcranks:tribe"));
2876
        }
2877
        if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
2878
            myname.setSubtribeStr(atomisedMap.get("dwcranks:subtribe"));
2879
        }
2880
        if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
2881
            myname.setGenusStr(atomisedMap.get("dwc:genus"));
2882
        }
2883
        if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2884
            myname.setSubgenusStr(atomisedMap.get("dwcranks:subgenus"));
2885
        }
2886
        if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2887
            myname.setSubgenusStr(atomisedMap.get("dwc:subgenus"));
2888
        }
2889
        if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
2890
            String n=newName;
2891
            if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2892
                n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2893
                n=n.replace("subsp.","");
2894
            }
2895
            if(atomisedMap.get("dwc:subspecies") != null) {
2896
                n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2897
                n=n.replace("subsp.","");
2898
            }
2899
            if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2900
                n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2901
                n=n.replace("var.","");
2902
                n=n.replace("v.","");
2903
            }
2904
            if(atomisedMap.get("dwcranks:formepithet") != null) {
2905
                //TODO
2906
                System.out.println("TODO FORMA");
2907
                n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
2908
                n=n.replace("forma","");
2909
            }
2910
            n=n.trim();
2911
            String author = myname.getAuthor();
2912
            if(n.split(" ").length>2){
2913

    
2914
                String n2=n.split(" ")[0]+" "+n.split(" ")[1];
2915
                String a= "";
2916
                try{
2917
                    a=n.split(n2)[1].trim();
2918
                }catch(Exception e){
2919
                    logger.info("no author in "+n+"?");}
2920

    
2921
                myname.setAuthor(a);
2922
                //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
2923
                n=n2;
2924

    
2925
            }
2926

    
2927
            myname.setSpeciesStr(atomisedMap.get("dwc:species"));
2928
            myname.setAuthor(author);
2929
        }
2930
        if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2931
            myname.setSubspeciesStr(atomisedMap.get("dwc:subspecies"));
2932
        }
2933
        if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2934
            myname.setSubspeciesStr(atomisedMap.get("dwc:infraspecificepithet"));
2935
        }
2936
        if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
2937
            myname.setVarietyStr(atomisedMap.get("dwcranks:varietyepithet"));
2938
        }
2939
        if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
2940
            myname.setFormStr(atomisedMap.get("dwcranks:formepithet"));
2941
        }
2942
        if (atomisedMap.get(PUBLICATION_YEAR) != null){
2943
        	myname.setPublicationYear(Integer.valueOf(atomisedMap.get(PUBLICATION_YEAR)));
2944
        }
2945
    }
2946

    
2947
    /**
2948
     * @see #createUnparsedSynonymNew(Rank, String, HashMap, MyName)
2949
     * @param rank
2950
     * @param newName
2951
     * @param atomisedMap
2952
     * @param myname
2953
     */
2954
    private void createUnparsedSynonym(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
2955
        logger.info("createSynonym");
2956
        //System.out.println("createsynonym");
2957
        if(rank.equals(Rank.UNKNOWN_RANK())){
2958
            myname.setNotParsableTaxon(newName);
2959
        }else{
2960
	        if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY()) && rank.equals(Rank.FAMILY())){
2961
	            myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
2962
	        }
2963
	        if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY()) && rank.equals(Rank.SUBFAMILY())){
2964
	            myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
2965
	        }
2966
	        if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE()) && rank.equals(Rank.TRIBE())){
2967
	            myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
2968
	        }
2969
	        if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE()) && rank.equals(Rank.SUBTRIBE())){
2970
	            myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
2971
	        }
2972
	        if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS()) && rank.equals(Rank.GENUS())){
2973
	            myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
2974
	        }
2975
	        if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2976
	            myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
2977
	        }
2978
	        if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2979
	            myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
2980
	        }
2981
	        if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES()) && rank.equals(Rank.SPECIES())){
2982
	            String n=newName;
2983
	            if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2984
	                n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2985
	                n=n.replace("subsp.","");
2986
	            }
2987
	            if(atomisedMap.get("dwc:subspecies") != null) {
2988
	                n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2989
	                n=n.replace("subsp.","");
2990
	            }
2991
	            if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2992
	                n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2993
	                n=n.replace("var.","");
2994
	                n=n.replace("v.","");
2995
	            }
2996
	            if(atomisedMap.get("dwcranks:formepithet") != null) {
2997
	                //TODO
2998
	                //System.out.println("TODO FORMA");
2999
	                n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3000
	                n=n.replace("forma","");
3001
	            }
3002
	            n=n.trim();
3003
	            String author = myname.getAuthor();
3004
	            if(n.split(" ").length>2){
3005

    
3006
	                String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3007
	                String a="";
3008
	                try{
3009
	                    a= n.split(n2)[1].trim();
3010
	                }catch(Exception e){logger.info("no author in "+n);}
3011
	                myname.setAuthor(a);
3012
	                //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3013
	                n=n2;
3014

    
3015
	            }
3016
	            Taxon species = myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank);
3017
	            myname.setSpecies(species);
3018
	            myname.setAuthor(author);
3019
	        }
3020
	        if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3021
	            myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3022
	        }
3023
	        if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3024
	            myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3025
	        }
3026
	        if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY()) && rank.equals(Rank.VARIETY())){
3027
	            myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3028
	        }
3029
	        if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM()) && rank.equals(Rank.FORM())){
3030
	            myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3031
	        }
3032
        }
3033

    
3034
    }
3035

    
3036

    
3037
    /**
3038
     * @param refMods
3039
     * @see #createUnparsedSynonym(Rank, String, HashMap, MyName)
3040
     * the original TaxonXImport extracted Synonyms by creating acc Taxa with partial names
3041
     * I (AM) do not understand this but don't want to destroy code which maybe works in some cases) there
3042
     * I created this switch for old
3043
     * for Spiders the new version is preferred
3044
     */
3045
    private void createUnparsedSynonymNew(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname, Reference refMods) {
3046
        logger.info("createSynonym");
3047

    
3048
        INonViralName nameToBeFilled = this.getNonViralNameAccNomenclature();
3049
        //System.out.println("createsynonym");
3050
        if(rank.equals(Rank.UNKNOWN_RANK())){
3051
            //TODO
3052
        	myname.setNotParsableTaxon(newName);
3053

    
3054
        	nameToBeFilled.setTitleCache(newName, true);
3055
        }else{
3056
        	if(atomisedMap.get("dwc:genus") != null ){
3057
    			nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:genus"));
3058
	        }
3059
        	if (rank.isSupraGeneric()){
3060
        		if (atomisedMap.get("dwcranks:subtribe") != null ){
3061
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3062
    	        }else if (atomisedMap.get("dwcranks:subtribe") != null ){
3063
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3064
    	        }else if (atomisedMap.get("dwcranks:tribe") != null ){
3065
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:tribe"));
3066
    	        }else if (atomisedMap.get("dwcranks:subfamily") != null ){
3067
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subfamily"));
3068
    	        }else if (atomisedMap.get("dwc:family") != null ){
3069
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:family"));
3070
        	    }else{
3071
        	    	logger.warn("Supra generic rank not yet handled or atomisation not available");
3072
        	    }
3073
        	}
3074
        	if (atomisedMap.get("dwcranks:subgenus") != null){
3075
        		nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwcranks:subgenus"));
3076
        	}
3077
        	if (atomisedMap.get("dwc:subgenus") != null){
3078
        		nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwc:subgenus"));
3079
        	}
3080
        	if (atomisedMap.get("dwc:species") != null){
3081
        		nameToBeFilled.setSpecificEpithet(atomisedMap.get("dwc:species"));
3082
        	}
3083
        	if (atomisedMap.get("dwcranks:formepithet") != null){
3084
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:formepithet"));
3085
        	}else if (atomisedMap.get("dwcranks:varietyepithet") != null){
3086
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:varietyepithet"));
3087
        	}else if (atomisedMap.get("dwc:infraspecificepithet") != null){
3088
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:infraspecificepithet"));
3089
        	}else if (atomisedMap.get("dwc:subspecies") != null){
3090
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:subspecies"));
3091
        	}
3092
            Reference sec = sourceUrlRef;
3093
            if(!state2.getConfig().doKeepOriginalSecundum()){
3094
                sec = state2.getConfig().getSecundum();
3095
            }
3096
        	Synonym syn = Synonym.NewInstance(nameToBeFilled, sec);
3097
//        	sourceHandler.addSource(refMods, syn);
3098
        	myname.setSyno(syn);
3099
        	myname.setSynonym(true);
3100
        }
3101
	}
3102

    
3103
    /**
3104
     * @param rank
3105
     * @param newName
3106
     * @param atomisedMap
3107
     * @param myname
3108
     */
3109
    private void createAtomisedTaxon(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
3110
        logger.info("createAtomisedTaxon "+atomisedMap);
3111
        if(rank.equals(Rank.UNKNOWN_RANK())){
3112
            myname.setNotParsableTaxon(newName);
3113
        }
3114
        else{
3115
            if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
3116
                myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
3117
            }
3118
            if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY())){
3119
                myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
3120
            }
3121
            if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
3122
                myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
3123
            }
3124
            if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
3125
                myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
3126
            }
3127
            if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
3128
                myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
3129
            }
3130
            if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3131
                myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
3132
            }
3133
            if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3134
                myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
3135
            }
3136
            if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
3137
                String n=newName;
3138
                if(atomisedMap.get("dwc:infraspecificepithet") != null) {
3139
                    n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
3140
                    n=n.replace("subsp.","");
3141
                }
3142
                if(atomisedMap.get("dwc:subspecies") != null) {
3143
                    n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
3144
                    n=n.replace("subsp.","");
3145
                }
3146
                if(atomisedMap.get("dwcranks:varietyepithet") != null) {
3147
                    n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
3148
                    n=n.replace("var.","");
3149
                    n=n.replace("v.","");
3150
                }
3151
                if(atomisedMap.get("dwcranks:formepithet") != null) {
3152
                    //TODO
3153
                    //System.out.println("TODO FORMA");
3154
                    n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3155
                    n=n.replace("forma","");
3156
                }
3157
                n=n.trim();
3158
                String author = myname.getAuthor();
3159
                if(n.split(" ").length>2){
3160
                    String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3161
                    String a="";
3162
                    try{
3163
                        a= n.split(n2)[1].trim();
3164
                    }catch(Exception e){logger.info("no author  in "+n);}
3165
                    myname.setAuthor(a);
3166
                    //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3167
                    n=n2;
3168

    
3169
                }
3170

    
3171
                myname.setSpecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank));
3172
                myname.setAuthor(author);
3173
            }
3174
            if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3175
                myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3176
            }
3177
            if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3178
                myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3179
            }
3180
            if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
3181
                myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3182
            }
3183
            if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
3184
                myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3185
            }
3186
        }
3187
    }
3188

    
3189
    /**
3190
     * @return
3191
     */
3192
    private boolean checkRankValidForImport(Rank currentRank) {
3193
        //logger.info("checkRankValidForImport");
3194
        return currentRank.isLower(state2.getConfig().getMaxRank()) || currentRank.equals(state2.getConfig().getMaxRank());
3195
    }
3196

    
3197

    
3198

    
3199
    /**
3200
     * @param classification2
3201
     */
3202
    public void updateClassification(Classification classification2) {
3203
        //logger.info("updateClassification");
3204
        classification = classification2;
3205
    }
3206

    
3207

    
3208

    
3209
    public class MyName {
3210
        /**
3211
         * @param isSynonym
3212
         */
3213
        public MyName(boolean isSynonym) {
3214
            super();
3215
            this.isSynonym = isSynonym;
3216
        }
3217

    
3218
        String originalName="";
3219
        String newName="";
3220
        Rank rank=Rank.UNKNOWN_RANK();
3221
        String identifier="";
3222
        String status="";
3223
        String author=null;
3224

    
3225
        TaxonName taxonName;
3226

    
3227
        Reference refMods ;
3228

    
3229
        Taxon family,subfamily,tribe,subtribe,genus,subgenus,species,subspecies, variety,form;
3230
        INonViralName familyName, subfamilyName, tribeName,subtribeName,genusName,subgenusName,speciesName,subspeciesName;
3231
        String familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr;
3232
        Integer publicationYear;
3233

    
3234

    
3235
		Taxon higherTaxa;
3236
        Rank higherRank;
3237
        private Taxon taxon;
3238
        private Synonym syno;
3239

    
3240
        /**
3241
         * @return the syno
3242
         */
3243
        public Synonym getSyno() {
3244
            return syno;
3245
        }
3246

    
3247
        @Override
3248
        public String toString(){
3249
            List<String> tot=new ArrayList<String>();
3250
            String[] n= {familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr};
3251
            for (String elt:n){
3252
                if (!StringUtils.isEmpty(elt)) {
3253
                    tot.add(elt);
3254
                } else {
3255
                    tot.add("*");
3256
                }
3257
            }
3258
            return StringUtils.join(tot," ");
3259
        }
3260
        /**
3261
         * @param syno the syno to set
3262
         */
3263
        public void setSyno(Synonym syno) {
3264
            this.syno = syno;
3265
        }
3266

    
3267
        boolean isSynonym=false;
3268

    
3269
        /**
3270
         * @return the isSynonym
3271
         */
3272
        public boolean isSynonym() {
3273
            return isSynonym;
3274
        }
3275

    
3276
        /**
3277
         * @param isSynonym the isSynonym to set
3278
         */
3279
        public void setSynonym(boolean isSynonym) {
3280
            this.isSynonym = isSynonym;
3281
        }
3282

    
3283
        public void setSource(Reference re){
3284
            refMods=re;
3285
        }
3286

    
3287
        /**
3288
         * @param string
3289
         */
3290
        public void setFormStr(String string) {
3291
            this.formStr=string;
3292

    
3293
        }
3294
        /**
3295
         * @param string
3296
         */
3297
        public void setVarietyStr(String string) {
3298
            this.varietyStr=string;
3299

    
3300
        }
3301
        /**
3302
         * @param string
3303
         */
3304
        public void setSubspeciesStr(String string) {
3305
            this.subspeciesStr=string;
3306

    
3307
        }
3308
        /**
3309
         * @param string
3310
         */
3311
        public void setSpeciesStr(String string) {
3312
            this.speciesStr=string;
3313

    
3314
        }
3315
        /**
3316
         * @param string
3317
         */
3318
        public void setSubgenusStr(String string) {
3319
            this.subgenusStr=string;
3320

    
3321
        }
3322
        /**
3323
         * @param string
3324
         */
3325
        public void setGenusStr(String string) {
3326
            this.genusStr=string;
3327

    
3328
        }
3329
        /**
3330
         * @param string
3331
         */
3332
        public void setSubtribeStr(String string) {
3333
            this.subtribeStr=string;
3334

    
3335
        }
3336
        /**
3337
         * @param string
3338
         */
3339
        public void setTribeStr(String string) {
3340
            this.tribeStr=string;
3341

    
3342
        }
3343
        /**
3344
         * @param string
3345
         */
3346
        public void setSubfamilyStr(String string) {
3347
            this.subfamilyStr=string;
3348

    
3349
        }
3350
        /**
3351
         * @param string
3352
         */
3353
        public void setFamilyStr(String string) {
3354
            this.familyStr=string;
3355

    
3356
        }
3357
        /**
3358
         * @return the familyStr
3359
         */
3360
        public String getFamilyStr() {
3361
            return familyStr;
3362
        }
3363
        /**
3364
         * @return the subfamilyStr
3365
         */
3366
        public String getSubfamilyStr() {
3367
            return subfamilyStr;
3368
        }
3369
        /**
3370
         * @return the tribeStr
3371
         */
3372
        public String getTribeStr() {
3373
            return tribeStr;
3374
        }
3375
        /**
3376
         * @return the subtribeStr
3377
         */
3378
        public String getSubtribeStr() {
3379
            return subtribeStr;
3380
        }
3381
        /**
3382
         * @return the genusStr
3383
         */
3384
        public String getGenusStr() {
3385
            return genusStr;
3386
        }
3387
        /**
3388
         * @return the subgenusStr
3389
         */
3390
        public String getSubgenusStr() {
3391
            return subgenusStr;
3392
        }
3393
        /**
3394
         * @return the speciesStr
3395
         */
3396
        public String getSpeciesStr() {
3397
            return speciesStr;
3398
        }
3399
        /**
3400
         * @return the subspeciesStr
3401
         */
3402
        public String getSubspeciesStr() {
3403
            return subspeciesStr;
3404
        }
3405
        /**
3406
         * @return the formStr
3407
         */
3408
        public String getFormStr() {
3409
            return formStr;
3410
        }
3411
        /**
3412
         * @return the varietyStr
3413
         */
3414
        public String getVarietyStr() {
3415
            return varietyStr;
3416
        }
3417

    
3418
        public Integer getPublicationYear() {
3419
			return publicationYear;
3420
		}
3421

    
3422
		public void setPublicationYear(Integer publicationYear) {
3423
			this.publicationYear = publicationYear;
3424
		}
3425

    
3426
        /**
3427
         * @param newName2
3428
         */
3429
        public void setNotParsableTaxon(String newName2) {
3430
            //takes too much time
3431
            //            List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3432

    
3433
            NomenclaturalStatusType statusType = null;
3434
            if (!getStatus().isEmpty()){
3435
                try {
3436
                    statusType = nomStatusString2NomStatus(getStatus());
3437
                } catch (UnknownCdmTypeException e) {
3438
                    addProblematicStatusToFile(getStatus());
3439
                    logger.warn("Problem with status");
3440
                }
3441
            }
3442
            List<TaxonBase> tmpList = new ArrayList<>();
3443

    
3444
            Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, newName2, MatchMode.BEGINNING, null, null, null, null, null);
3445
            tmpList.addAll(taxontest.getRecords());
3446

    
3447
            //logger.info("tmpList returned: "+tmpList.size());
3448

    
3449

    
3450
            INonViralName identicName = null;
3451
            boolean foundIdentic=false;
3452
            TaxonBase<?> tmpTaxonBase=null;
3453
            //            Taxon tmpPartial=null;
3454
            for (TaxonBase<?> tmpb:tmpList){
3455
                if(tmpb !=null){
3456
                    TaxonName tnb =  tmpb.getName();
3457
                    Rank crank=null;
3458
                    if (tnb != null){
3459
                        if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2) ){
3460
                            crank =tnb.getRank();
3461
                            if (crank !=null && rank !=null){
3462
                                if (crank.equals(rank)){
3463
                                	identicName = tnb;
3464
                                	if (isSynonym && tmpb.isInstanceOf(Synonym.class) || !isSynonym && tmpb.isInstanceOf(Taxon.class)){
3465
                                		foundIdentic=true;
3466
                                		tmpTaxonBase=tmpb;
3467
                               			break;
3468
                                	}
3469
                                }
3470
                            }
3471
                        }
3472
                    }
3473
                }
3474
            }
3475
            boolean statusMatch=false;
3476
            boolean appendedMatch=false;
3477
            if(tmpTaxonBase !=null && foundIdentic){
3478
                statusMatch=compareStatus(tmpTaxonBase, statusType);
3479
                if (!getStatus().isEmpty() && ! (tmpTaxonBase.getAppendedPhrase() == null)) {
3480
                    appendedMatch=tmpTaxonBase.getAppendedPhrase().equals(getStatus());
3481
                }
3482
                if (getStatus().isEmpty() && tmpTaxonBase.getAppendedPhrase() == null) {
3483
                    appendedMatch=true;
3484
                }
3485

    
3486
            }
3487
            if ((tmpTaxonBase == null || !foundIdentic) ||  (tmpTaxonBase != null && !statusMatch) ||  (tmpTaxonBase != null && !appendedMatch && !statusMatch)){
3488

    
3489
            	INonViralName tnb;
3490
            	if (identicName == null){
3491
            		tnb = getNonViralNameAccNomenclature();
3492
            		tnb.setRank(rank);
3493

    
3494
	                if(statusType != null) {
3495
	                    tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3496
	                }
3497
	                if(StringUtils.isNotBlank(getStatus())) {
3498
	                    tnb.setAppendedPhrase(getStatus());
3499
	                }
3500
	                tnb.setTitleCache(newName2,true);
3501
	                tmpTaxonBase = findMatchingTaxon(tnb,refMods);
3502
	            }else{
3503
            		tnb = identicName;
3504
            	}
3505

    
3506
                if(tmpTaxonBase==null){
3507
                    tmpTaxonBase = isSynonym ? Synonym.NewInstance(tnb, refMods) : Taxon.NewInstance(tnb, refMods);
3508
                    if(!state2.getConfig().doKeepOriginalSecundum()) {
3509
                        tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3510
                    }
3511
                    //tmptaxonbase.setSec(refMods);
3512
                    if(!isSynonym) {
3513
                        classification.addChildTaxon((Taxon)tmpTaxonBase, null, null);
3514
                        sourceHandler.addSource(refMods, (Taxon)tmpTaxonBase);
3515
                    }
3516
                }
3517
            }
3518

    
3519
            tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3520
            if (author != null) {
3521
                if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3522
                    setLSID(getIdentifier(), tmpTaxonBase);
3523
                    importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3524
                    tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3525
                }
3526
            }
3527
            TaxonName tnb = CdmBase.deproxy(tmpTaxonBase.getName(), TaxonName.class);
3528

    
3529
            if(!isSynonym) {
3530
                this.taxon=(Taxon)tmpTaxonBase;
3531
            } else {
3532
                if (tmpTaxonBase instanceof Taxon){
3533
                	logger.warn("Incorrect status");
3534
                }
3535
            	this.syno=(Synonym)tmpTaxonBase;
3536
            }
3537

    
3538
            taxonName = tnb;
3539

    
3540
        }
3541

    
3542
        /**
3543
         *
3544
         */
3545
        public void buildTaxon() {
3546
            //System.out.println("BUILD TAXON");
3547
            logger.info("buildTaxon");
3548
            NomenclaturalStatusType statusType = null;
3549
            if (!getStatus().isEmpty()){
3550
            	status = getStatus();
3551
            	String newNameStatus = newNameStatus(status);
3552
            	if (newNameStatus != null){
3553
            		taxonName.setAppendedPhrase(newNameStatus);
3554
            	}else{
3555
            		try {
3556
            			statusType = nomStatusString2NomStatus(getStatus());
3557
            			taxonName.addStatus(NomenclaturalStatus.NewInstance(statusType));
3558
            		} catch (UnknownCdmTypeException e) {
3559
            			addProblematicStatusToFile(getStatus());
3560
            			logger.warn("Problem with status");
3561
            		}
3562
            	}
3563
            }
3564
            importer.getNameService().save(taxonName);
3565

    
3566
            TaxonBase<?> tmpTaxonBase;
3567
            if (!isSynonym) {
3568
                tmpTaxonBase =Taxon.NewInstance(taxonName, refMods); //sec set null
3569
            }
3570
            else {
3571
                tmpTaxonBase =Synonym.NewInstance(taxonName, refMods); //sec set null
3572
            }
3573
            boolean exist = false;
3574
            if (!isSynonym){
3575
	            for (TaxonNode node : classification.getAllNodes()){
3576
	                try{
3577
	                	Taxon nodeTaxon = node.getTaxon();
3578
	                	boolean titleMatches = nodeTaxon.getTitleCache().equalsIgnoreCase(tmpTaxonBase.getTitleCache());
3579
	                	boolean nomStatusMatches = compareStatus(node.getTaxon(), statusType);
3580
	                	boolean nodeNameReplaceable = checkNodeNameReplaceable(nodeTaxon, tmpTaxonBase);
3581
	                    if(titleMatches && nomStatusMatches) {
3582
	                    	if (!isSynonym) {
3583
	                    		tmpTaxonBase=CdmBase.deproxy(nodeTaxon, TaxonBase.class);
3584
	                            exist =true;
3585
	                        } else {
3586
	                            logger.info("Found the same name but from another type (taxon/synonym)");
3587
	                            TaxonName existingTnb = getTaxon().getName();
3588
                                tmpTaxonBase = Synonym.NewInstance(existingTnb, refMods);
3589
                                importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3590
                                exist =true;
3591
                            }
3592
	                    }else if (nodeNameReplaceable){
3593
	                    	nodeTaxon.setName(tmpTaxonBase.getName());
3594
	                    	tmpTaxonBase = nodeTaxon;
3595
	                    	exist = true;
3596
	                    }
3597
	                }catch(NullPointerException n){logger.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3598
	            }
3599
            }
3600
            if (!exist){
3601

    
3602
                boolean insertAsExisting =false;
3603
                List<Taxon> existingTaxons=new ArrayList<Taxon>();
3604
                try {
3605
                    existingTaxons = getMatchingTaxa(taxonName);
3606
                } catch (Exception e1) {
3607
                    e1.printStackTrace();
3608
                }
3609
                double similarityScore=0.0;
3610
                double similarityAuthor=-1;
3611
                String author1="";
3612
                String author2="";
3613
                String t1="";
3614
                String t2="";
3615
                for (Taxon bestMatchingTaxon : existingTaxons){
3616
                    //System.out.println("tnbase "+taxonname.getTitleCache());
3617
                    //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3618
                    if(taxonName.getAuthorshipCache()!=null) {
3619
                    	author1=taxonName.getAuthorshipCache();
3620
                    }
3621
                    try {
3622
                        if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
3623
                            author2=bestMatchingTaxon.getName().getAuthorshipCache();
3624
                        }
3625
                    } catch (Exception e) {
3626
                        // TODO Auto-generated catch block
3627
                        e.printStackTrace();
3628
                    }
3629
                    try {
3630
                        t1=taxonName.getTitleCache();
3631
                        if (author1!=null && !StringUtils.isEmpty(author1)) {
3632
                            t1=t1.split(Pattern.quote(author1))[0];
3633
                        }
3634
                    } catch (Exception e) {
3635
                        // TODO Auto-generated catch block
3636
                        e.printStackTrace();
3637
                    }
3638
                    try {
3639
                        t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
3640
                        if (author2!=null && !StringUtils.isEmpty(author2)) {
3641
                            t2=t2.split(Pattern.quote(author2))[0];
3642
                        }
3643
                    } catch (Exception e) {
3644
                        // TODO Auto-generated catch block
3645
                        e.printStackTrace();
3646
                    }
3647

    
3648
                    similarityScore=similarity(t1.trim(), t2.trim());
3649
                    //System.out.println("taxonscore "+similarityScore);
3650
                    similarityAuthor=similarity(author1.trim(), author2.trim());
3651
                    //System.out.println("authorscore "+similarityAuthor);
3652
                    insertAsExisting = compareAndCheckTaxon(taxonName, refMods, similarityScore, bestMatchingTaxon, similarityAuthor);
3653
                    if(insertAsExisting) {
3654
                        tmpTaxonBase=bestMatchingTaxon;
3655
                        break;
3656
                    }
3657
                }
3658
                if ( !insertAsExisting ){
3659
                    if(!state2.getConfig().doKeepOriginalSecundum()) {
3660
                        tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3661
                    }
3662

    
3663
                    //                    tmptaxonbase.setSec(refMods);
3664
                    if (taxonName.getRank().equals(state2.getConfig().getMaxRank())) {
3665
                        //System.out.println("****************************"+tmptaxonbase);
3666
                        if (!isSynonym) {
3667
                            classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3668
                        }
3669
                    } else{
3670
                        hierarchy = new HashMap<Rank, Taxon>();
3671
                        //System.out.println("LOOK FOR PARENT "+taxonname.toString()+", "+tmptaxonbase.toString());
3672
                        if (!isSynonym){
3673
                            lookForParentNode(taxonName,(Taxon)tmpTaxonBase, refMods,this);
3674
                            //System.out.println("HIERARCHY "+hierarchy);
3675
                            Taxon parent = buildHierarchy();
3676
                            if(!taxonExistsInClassification(parent,(Taxon)tmpTaxonBase)){
3677
                                if(parent !=null) {
3678
                                    classification.addParentChild(parent, (Taxon)tmpTaxonBase, refMods, null);
3679
                                } else {
3680
                                    classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3681
                                }
3682
                                importer.getClassificationService().saveOrUpdate(classification);
3683
                            }
3684
                        }
3685
                        //                        Set<TaxonNode> nodeList = classification.getAllNodes();
3686
                        //                        for(TaxonNode tn:nodeList) {
3687
                        //                            System.out.println(tn.getTaxon());
3688
                        //                        }
3689
                    }
3690
                }
3691
                importer.getClassificationService().saveOrUpdate(classification);
3692
                 if(isSynonym) {
3693
                    try{
3694
                        Synonym castTest=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3695
                    }catch(Exception e){
3696
                        TaxonName existingTnb = tmpTaxonBase.getName();
3697
                        Synonym castTest = Synonym.NewInstance(existingTnb, refMods);
3698
                        importer.getTaxonService().saveOrUpdate(castTest);
3699
                        tmpTaxonBase=CdmBase.deproxy(castTest, Synonym.class);
3700
                    }
3701
                }
3702
            }
3703
            if(!isSynonym) {
3704
                taxon=CdmBase.deproxy(tmpTaxonBase, Taxon.class);
3705
            } else {
3706
                syno=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3707
            }
3708

    
3709
        }
3710

    
3711
		private boolean checkNodeNameReplaceable(Taxon nodeTaxon, TaxonBase<?> newTaxon) {
3712
			//TODO preliminary check
3713
			if (newTaxon.isInstanceOf(Synonym.class)){
3714
				return false;
3715
			}
3716
			INonViralName nodeName = nodeTaxon.getName();
3717
			INonViralName newName = newTaxon.getName();
3718
			if (nodeTaxon.getName() == null ||  newName == null){
3719
				return false;
3720
			}
3721
			if (nodeTaxon.getDescriptions().size() > 0 || nodeName.getDescriptions().size() > 0 || nodeName.getTypeDesignations().size() > 0 ){
3722
				return false;
3723
			}
3724
			boolean compare = true;
3725
			for (NomenclaturalStatus status : newName.getStatus() ){
3726
				compare &= compareStatus(nodeTaxon, status.getType());
3727
			}
3728
			if (! compare){
3729
				return false;
3730
			}
3731

    
3732
			if (nodeName.getNameCache() != null && nodeName.getNameCache().equals(newName.getNameCache())){
3733
				if (nodeName.getNameCache().equals(nodeName.getTitleCache())){
3734
					if (newName.getNameCache().length() < newName.getTitleCache().length()){
3735
						logger.warn("We still need to check, if node was automatically created via hierarchy creation: " + nodeName.getNameCache());
3736
						return true;
3737
					}
3738
				}
3739
			}
3740

    
3741
			return false;
3742
		}
3743

    
3744
		/**
3745
         *
3746
         */
3747
        private Taxon buildHierarchy() {
3748
            logger.info("buildHierarchy");
3749
            Taxon higherTaxon = null;
3750
            //add the maxRank as a root
3751
            if(hierarchy.containsKey(state2.getConfig().getMaxRank())){
3752
                Taxon ct=hierarchy.get(state2.getConfig().getMaxRank());
3753
                if(!taxonExistsInClassification(higherTaxon, ct)) {
3754
                   classification.addChildTaxon(ct, refMods, null);
3755
                }
3756
                higherTaxon = hierarchy.get(state2.getConfig().getMaxRank());
3757
                //                return higherTaxon;
3758
            }
3759
            //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3760

    
3761
            //TODO higher Ranks
3762

    
3763
            if(hierarchy.containsKey(Rank.FAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.FAMILY())){
3764
                higherTaxon=saveAndGetHigherTaxon(Rank.FAMILY(),higherTaxon);
3765
            }
3766
            if(hierarchy.containsKey(Rank.SUBFAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.SUBFAMILY())){
3767
                higherTaxon=saveAndGetHigherTaxon(Rank.SUBFAMILY(),higherTaxon);
3768
            }
3769
            if(hierarchy.containsKey(Rank.TRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.TRIBE())){
3770
                higherTaxon=saveAndGetHigherTaxon(Rank.TRIBE(),higherTaxon);
3771
            }
3772
            if(hierarchy.containsKey(Rank.SUBTRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.SUBTRIBE())){
3773
                higherTaxon=saveAndGetHigherTaxon(Rank.SUBTRIBE(),higherTaxon);
3774
            }
3775
            if(hierarchy.containsKey(Rank.GENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3776
                higherTaxon=saveAndGetHigherTaxon(Rank.GENUS(),higherTaxon);
3777
            }
3778
            if(hierarchy.containsKey(Rank.SUBGENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3779
                higherTaxon=saveAndGetHigherTaxon(Rank.SUBGENUS(),higherTaxon);
3780
            }
3781
            importer.getClassificationService().saveOrUpdate(classification);
3782
            return higherTaxon;
3783
        }
3784

    
3785
        private Taxon saveAndGetHigherTaxon(Rank r, Taxon higherTaxon){
3786
            Taxon ct=hierarchy.get(r);
3787
            if(!taxonExistsInClassification(higherTaxon,ct )) {
3788
                if(higherTaxon != null && ct!=null) {
3789
                    classification.addParentChild(higherTaxon, ct, refMods, null);
3790
                } else
3791
                    if(higherTaxon == null && ct !=null) {
3792
                        classification.addChildTaxon(ct, refMods, null);
3793
                }
3794
            }
3795
            return ct;
3796
        }
3797

    
3798
        private boolean taxonExistsInClassification(Taxon parent, Taxon child){
3799
            logger.info("taxonExistsInClassification");
3800
            //            System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3801
            boolean found=false;
3802
            if(parent !=null){
3803
                for (TaxonNode p : classification.getAllNodes()){
3804
                    if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
3805
                        for (TaxonNode c : p.getChildNodes()) {
3806
                            if (c.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3807
                                found=true;
3808
                                break;
3809
                            }
3810
                        }
3811
                    }
3812
                }
3813
            }
3814
            else{
3815
                for (TaxonNode p : classification.getAllNodes()){
3816
                    if(p.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3817
                        found=true;
3818
                        break;
3819
                    }
3820
                }
3821
            }
3822
            //            System.out.println("LOOK IF TAXA EXIST? "+found);
3823
            return found;
3824
        }
3825
        /**
3826
         * @param nameToBeFilledTest
3827
         */
3828
        public void setParsedName(TaxonName nameToBeFilledTest) {
3829
            this.taxonName = TaxonName.castAndDeproxy(nameToBeFilledTest);
3830

    
3831
        }
3832
        //variety dwcranks:varietyEpithet
3833
        /**
3834
         * @return the author
3835
         */
3836
        public String getAuthor() {
3837
            return author;
3838
        }
3839
        /**
3840
         * @return
3841
         */
3842
        public Taxon getTaxon() {
3843
            return taxon;
3844
        }
3845
        /**
3846
         * @return
3847
         */
3848
        public TaxonName getTaxonName() {
3849
            return taxonName;
3850
        }
3851

    
3852
        /**
3853
         * @param findOrCreateTaxon
3854
         */
3855
        public void setForm(Taxon form) {
3856
            this.form=form;
3857

    
3858
        }
3859
        /**
3860
         * @param findOrCreateTaxon
3861
         */
3862
        public void setVariety(Taxon variety) {
3863
            this.variety=variety;
3864

    
3865
        }
3866
        /**
3867
         * @param string
3868
         * @return
3869
         */
3870
        @SuppressWarnings("rawtypes")
3871
        public Taxon findOrCreateTaxon(String partialname,String fullname, Rank rank, Rank globalrank) {
3872
            logger.info("findOrCreateTaxon");
3873
            sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
3874
            //takes too much time
3875
            //            List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3876
            //            logger.info("tmpList returned: "+tmpList.size());
3877

    
3878
            NomenclaturalStatusType statusType = null;
3879
            if (!getStatus().isEmpty()){
3880
                try {
3881
                    statusType = nomStatusString2NomStatus(getStatus());
3882
                } catch (UnknownCdmTypeException e) {
3883
                    addProblematicStatusToFile(getStatus());
3884
                    logger.warn("Problem with status");
3885
                }
3886
            }
3887

    
3888
            List<TaxonBase> tmpListFiltered = new ArrayList<TaxonBase>();
3889

    
3890
            Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, fullname, MatchMode.BEGINNING, null, null, null, null, null);
3891

    
3892
            tmpListFiltered.addAll(taxontest.getRecords());
3893
            taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, partialname, MatchMode.BEGINNING, null, null, null, null, null);
3894
            tmpListFiltered.addAll(taxontest.getRecords());
3895

    
3896
            //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3897

    
3898
            boolean nameCorrected=false;
3899
            if (fullname.indexOf(partialname)<0) {
3900
                nameCorrected=true;
3901
            }
3902

    
3903
            boolean foundIdentic=false;
3904
            Taxon tmp=null;
3905
            for (TaxonBase tmpb:tmpListFiltered){
3906
                if(tmpb !=null){
3907
                    TaxonName tnb =  tmpb.getName();
3908
                    Rank crank=null;
3909
                    if (tnb != null){
3910
                         if(globalrank.equals(rank) || (globalrank.isLower(Rank.SPECIES()) && rank.equals(Rank.SPECIES()))){
3911
                            if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname) ){
3912
                                crank =tnb.getRank();
3913
                                if (crank !=null && rank !=null){
3914
                                    if (crank.equals(rank)){
3915
                                        foundIdentic=true;
3916
                                        try{
3917
                                            tmp=(Taxon)tmpb;
3918
                                            break;
3919
                                        }catch(Exception e){
3920
                                            e.printStackTrace();
3921
                                        }
3922
                                    }
3923
                                }
3924
                            }
3925
                            if(nameCorrected){ //for corrected names such as Anochetus -- A. blf-pat
3926
                                if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3927
                                    crank =tnb.getRank();
3928
                                    if (crank !=null && rank !=null){
3929
                                        if (crank.equals(rank)){
3930
                                            foundIdentic=true;
3931
                                            try{
3932
                                                tmp=(Taxon)tmpb;
3933
                                                break;
3934
                                            }catch(Exception e){
3935
                                                e.printStackTrace();
3936
                                            }
3937
                                        }
3938
                                    }
3939
                                }
3940
                            }
3941
                        }
3942
                        else{
3943
                            if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3944
                                crank =tnb.getRank();
3945
                                if (crank !=null && rank !=null){
3946
                                    if (crank.equals(rank)){
3947
                                        foundIdentic=true;
3948
                                        try{
3949
                                            tmp=(Taxon)tmpb;
3950
                                            break;
3951
                                        }catch(Exception e){
3952
                                            e.printStackTrace();
3953
                                        }
3954
                                    }
3955
                                }
3956
                            }
3957
                        }
3958
                    }
3959
                }
3960
            }
3961
            boolean statusMatch=false;
3962
            boolean appendedMatch=false;
3963
            if(tmp !=null && foundIdentic){
3964
                statusMatch=compareStatus(tmp, statusType);
3965
                if (!getStatus().isEmpty() && ! (tmp.getAppendedPhrase() == null)) {
3966
                    appendedMatch=tmp.getAppendedPhrase().equals(getStatus());
3967
                }
3968
                if (getStatus().isEmpty() && tmp.getAppendedPhrase() == null) {
3969
                    appendedMatch=true;
3970
                }
3971

    
3972
            }
3973
            if ((tmp == null || !foundIdentic) ||  (tmp != null && !statusMatch) ||  (tmp != null && !appendedMatch && !statusMatch)){
3974

    
3975
                INonViralName tnb = getNonViralNameAccNomenclature();
3976
                tnb.setRank(rank);
3977

    
3978
                if(statusType != null) {
3979
                    tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3980
                }
3981
                if(StringUtils.isNotBlank(getStatus())) {
3982
                    tnb.setAppendedPhrase(getStatus());
3983
                }
3984

    
3985
                if(rank.equals(Rank.UNKNOWN_RANK())){
3986
                    tnb.setTitleCache(fullname, true);
3987
                    //                    tnb.setGenusOrUninomial(fullname);
3988
                }
3989
                if(rank.isHigher(Rank.GENUS())) {
3990
                    tnb.setGenusOrUninomial(partialname);
3991
                }
3992

    
3993
                if(rank.isHigher(Rank.SPECIES())) {
3994
                    tnb.setTitleCache(partialname, true);
3995
                }
3996

    
3997
                if (rank.equals(globalrank) && author != null) {
3998

    
3999
                    tnb.setCombinationAuthorship(findOrCreateAuthor(author));
4000
                    if (getIdentifier() !=null && !getIdentifier().isEmpty()){
4001
                        Taxon taxonLSID = getTaxonByLSID(getIdentifier());
4002
                        if (taxonLSID !=null) {
4003
                            tmp=taxonLSID;
4004
                        }
4005
                    }
4006
                }
4007

    
4008
                if(tmp == null){
4009
                    if (rank.equals(Rank.FAMILY())) {
4010
                        tmp = buildFamily(tnb);
4011
                    }
4012
                    if (rank.equals(Rank.SUBFAMILY())) {
4013
                        tmp = buildSubfamily(tnb);
4014
                    }
4015
                    if (rank.equals(Rank.TRIBE())) {
4016
                        tmp = buildTribe(tnb);
4017
                    }
4018
                    if (rank.equals(Rank.SUBTRIBE())) {
4019
                        tmp = buildSubtribe(tnb);
4020
                    }
4021
                    if (rank.equals(Rank.GENUS())) {
4022
                        tmp = buildGenus(partialname, tnb);
4023
                    }
4024

    
4025
                    if (rank.equals(Rank.SUBGENUS())) {
4026
                        tmp = buildSubgenus(partialname, tnb);
4027
                    }
4028
                    if (rank.equals(Rank.SPECIES())) {
4029
                        tmp = buildSpecies(partialname, tnb);
4030
                    }
4031

    
4032
                    if (rank.equals(Rank.SUBSPECIES())) {
4033
                        tmp = buildSubspecies(partialname, tnb);
4034
                    }
4035

    
4036
                    if (rank.equals(Rank.VARIETY())) {
4037
                        tmp = buildVariety(fullname, partialname, tnb);
4038
                    }
4039

    
4040
                    if (rank.equals(Rank.FORM())) {
4041
                        tmp = buildForm(fullname, partialname, tnb);
4042
                    }
4043
                    if (tmp != null){
4044
                    	TaxonXTreatmentExtractor.this.sourceHandler.addSource(refMods, tmp);
4045
                    }
4046

    
4047
                    importer.getClassificationService().saveOrUpdate(classification);
4048
                }
4049

    
4050
            }
4051

    
4052
            tmp = CdmBase.deproxy(tmp, Taxon.class);
4053
            if (rank.equals(globalrank) && author != null) {
4054
                if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4055
                    setLSID(getIdentifier(), tmp);
4056
                    importer.getTaxonService().saveOrUpdate(tmp);
4057
                    tmp = CdmBase.deproxy(tmp, Taxon.class);
4058
                }
4059
            }
4060

    
4061
            this.taxon=tmp;
4062

    
4063
            return tmp;
4064
        }
4065

    
4066
        /**
4067
         * @param tnb
4068
         * @return
4069
         */
4070
        private Taxon buildSubfamily(INonViralName tnb) {
4071
            Taxon tmp;
4072
            //            tnb.generateTitle();
4073
            tmp = findMatchingTaxon(tnb,refMods);
4074
            if(tmp ==null){
4075
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4076
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4077
                    tmp.setSec(state2.getConfig().getSecundum());
4078
                }
4079
                //                tmp.setSec(refMods);
4080
                //                sourceHandler.addSource(refMods, tmp);
4081
                if(family != null) {
4082
                    classification.addParentChild(family, tmp, null, null);
4083
                    higherRank=Rank.FAMILY();
4084
                    higherTaxa=family;
4085
                } else {
4086
                    //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4087
                    classification.addChildTaxon(tmp, null, null);
4088
                }
4089
            }
4090
            return tmp;
4091
        }
4092
        /**
4093
         * @param tnb
4094
         * @return
4095
         */
4096
        private Taxon buildFamily(INonViralName tnb) {
4097
            Taxon tmp;
4098
            //            tnb.generateTitle();
4099
            tmp = findMatchingTaxon(tnb,refMods);
4100
            if(tmp ==null){
4101
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4102
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4103
                    tmp.setSec(state2.getConfig().getSecundum());
4104
                }
4105
                //                tmp.setSec(refMods);
4106
                //sourceHandler.addSource(refMods, tmp);
4107
                //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4108
                classification.addChildTaxon(tmp, null, null);
4109
            }
4110
            return tmp;
4111
        }
4112
        /**
4113
         * @param fullname
4114
         * @param tnb
4115
         * @return
4116
         */
4117
        private Taxon buildForm(String fullname, String partialname, INonViralName tnb) {
4118
            if (genusName !=null) {
4119
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4120
            }
4121
            if (subgenusName !=null) {
4122
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4123
            }
4124
            if(speciesName !=null) {
4125
                tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4126
            }
4127
            if(subspeciesName != null) {
4128
                tnb.setInfraSpecificEpithet(subspeciesName.getInfraSpecificEpithet());
4129
            }
4130
            if(partialname!= null) {
4131
                tnb.setInfraSpecificEpithet(partialname);
4132
            }
4133
             //TODO how to save form??
4134
            tnb.setTitleCache(fullname, true);
4135
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4136
            if(tmp ==null){
4137
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4138
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4139
                    tmp.setSec(state2.getConfig().getSecundum());
4140
                }
4141
                //                tmp.setSec(refMods);
4142
                //sourceHandler.addSource(refMods, tmp);
4143
                if (subspecies !=null) {
4144
                    classification.addParentChild(subspecies, tmp, null, null);
4145
                    higherRank=Rank.SUBSPECIES();
4146
                    higherTaxa=subspecies;
4147
                } else {
4148
                    if (species !=null) {
4149
                        classification.addParentChild(species, tmp, null, null);
4150
                        higherRank=Rank.SPECIES();
4151
                        higherTaxa=species;
4152
                    }
4153
                    else{
4154
                        //                        System.out.println("ADDCHILDTAXON FORM "+tmp);
4155
                        classification.addChildTaxon(tmp, null, null);
4156
                    }
4157
                }
4158
            }
4159
            return tmp;
4160
        }
4161
        /**
4162
         * @param fullname
4163
         * @param tnb
4164
         * @return
4165
         */
4166
        private Taxon buildVariety(String fullname, String partialname, INonViralName tnb) {
4167
            Taxon tmp;
4168
            if (genusName !=null) {
4169
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4170
            }
4171
            if (subgenusName !=null) {
4172
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4173
            }
4174
            if(speciesName !=null) {
4175
                tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4176
            }
4177
            if(subspeciesName != null) {
4178
                tnb.setInfraSpecificEpithet(subspeciesName.getSpecificEpithet());
4179
            }
4180
            if(partialname != null) {
4181
                tnb.setInfraSpecificEpithet(partialname);
4182
            }
4183
            //TODO how to save variety?
4184
            tnb.setTitleCache(fullname, true);
4185
            tmp = findMatchingTaxon(tnb,refMods);
4186
            if(tmp ==null){
4187
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4188
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4189
                    tmp.setSec(state2.getConfig().getSecundum());
4190
                }
4191
                //                tmp.setSec(refMods);
4192
                //sourceHandler.addSource(refMods, tmp);
4193
                if (subspecies !=null) {
4194
                    classification.addParentChild(subspecies, tmp, null, null);
4195
                    higherRank=Rank.SUBSPECIES();
4196
                    higherTaxa=subspecies;
4197
                } else {
4198
                    if(species !=null) {
4199
                        classification.addParentChild(species, tmp, null, null);
4200
                        higherRank=Rank.SPECIES();
4201
                        higherTaxa=species;
4202
                    }
4203
                    else{
4204
                        //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4205
                        classification.addChildTaxon(tmp, null, null);
4206
                    }
4207
                }
4208
            }
4209
            return tmp;
4210
        }
4211
        /**
4212
         * @param partialname
4213
         * @param tnb
4214
         * @return
4215
         */
4216
        private Taxon buildSubspecies(String partialname, INonViralName tnb) {
4217
            if (genusName !=null) {
4218
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4219
            }
4220
            if (subgenusName !=null) {
4221
                //                            System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4222
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4223
            }
4224
            if(speciesName !=null) {
4225
                //                            System.out.println("SPE:"+speciesName.getSpecificEpithet());
4226
                tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4227
            }
4228
            tnb.setInfraSpecificEpithet(partialname);
4229
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4230
            if(tmp ==null){
4231
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4232
                if(!state2.getConfig().doKeepOriginalSecundum())
4233
                 {
4234
                    tmp.setSec(state2.getConfig().getSecundum());
4235
                //                tmp.setSec(refMods);
4236
                //sourceHandler.addSource(refMods, tmp);
4237
                }
4238

    
4239
                if(species != null) {
4240
                    classification.addParentChild(species, tmp, null, null);
4241
                    higherRank=Rank.SPECIES();
4242
                    higherTaxa=species;
4243
                }
4244
                else{
4245
                    //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4246
                    classification.addChildTaxon(tmp, null, null);
4247
                }
4248
            }
4249
            return tmp;
4250
        }
4251
        /**
4252
         * @param partialname
4253
         * @param tnb
4254
         * @return
4255
         */
4256
        private Taxon buildSpecies(String partialname, INonViralName tnb) {
4257
            if (genusName !=null) {
4258
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4259
            }
4260
            if (subgenusName !=null) {
4261
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4262
            }
4263
            tnb.setSpecificEpithet(partialname.toLowerCase());
4264
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4265
            if(tmp ==null){
4266
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4267
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4268
                    tmp.setSec(state2.getConfig().getSecundum());
4269
                }
4270
                //                tmp.setSec(refMods);
4271
                //sourceHandler.addSource(refMods, tmp);
4272
                if (subgenus !=null) {
4273
                    classification.addParentChild(subgenus, tmp, null, null);
4274
                    higherRank=Rank.SUBGENUS();
4275
                    higherTaxa=subgenus;
4276
                } else {
4277
                    if (genus !=null) {
4278
                        classification.addParentChild(genus, tmp, null, null);
4279
                        higherRank=Rank.GENUS();
4280
                        higherTaxa=genus;
4281
                    }
4282
                    else{
4283
                        //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4284
                        classification.addChildTaxon(tmp, null, null);
4285
                    }
4286
                }
4287
            }
4288
            return tmp;
4289
        }
4290
        /**
4291
         * @param partialname
4292
         * @param tnb
4293
         * @return
4294
         */
4295
        private Taxon buildSubgenus(String partialname, INonViralName tnb) {
4296
            tnb.setInfraGenericEpithet(partialname);
4297
            if (genusName !=null) {
4298
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4299
            }
4300
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4301
            if(tmp ==null){
4302
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4303
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4304
                    tmp.setSec(state2.getConfig().getSecundum());
4305
                }
4306
                //                tmp.setSec(refMods);
4307
                //sourceHandler.addSource(refMods, tmp);
4308
                if(genus != null) {
4309
                    classification.addParentChild(genus, tmp, null, null);
4310
                    higherRank=Rank.GENUS();
4311
                    higherTaxa=genus;
4312
                } else{
4313
                    //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4314
                    classification.addChildTaxon(tmp, null, null);
4315
                }
4316
            }
4317
            return tmp;
4318
        }
4319
        /**
4320
         * @param partialname
4321
         * @param tnb
4322
         * @return
4323
         */
4324
        private Taxon buildGenus(String partialname, INonViralName tnb) {
4325
            Taxon tmp;
4326
            tnb.setGenusOrUninomial(partialname);
4327

    
4328

    
4329
            tmp = findMatchingTaxon(tnb,refMods);
4330
            if(tmp ==null){
4331
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4332
                if(!state2.getConfig().doKeepOriginalSecundum())
4333
                 {
4334
                    tmp.setSec(state2.getConfig().getSecundum());
4335
                //                tmp.setSec(refMods);
4336
                //sourceHandler.addSource(refMods, tmp);
4337
                }
4338

    
4339
                if(subtribe != null) {
4340
                    classification.addParentChild(subtribe, tmp, null, null);
4341
                    higherRank=Rank.SUBTRIBE();
4342
                    higherTaxa=subtribe;
4343
                } else{
4344
                    if(tribe !=null) {
4345
                        classification.addParentChild(tribe, tmp, null, null);
4346
                        higherRank=Rank.TRIBE();
4347
                        higherTaxa=tribe;
4348
                    } else{
4349
                        if(subfamily !=null) {
4350
                            classification.addParentChild(subfamily, tmp, null, null);
4351
                            higherRank=Rank.SUBFAMILY();
4352
                            higherTaxa=subfamily;
4353
                        } else
4354
                            if(family !=null) {
4355
                                classification.addParentChild(family, tmp, null, null);
4356
                                higherRank=Rank.FAMILY();
4357
                                higherTaxa=family;
4358
                            }
4359
                            else{
4360
                                //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4361
                                classification.addChildTaxon(tmp, null, null);
4362
                            }
4363
                    }
4364
                }
4365
            }
4366
            return tmp;
4367
        }
4368

    
4369
        /**
4370
         * @param tnb
4371
         * @return
4372
         */
4373
        private Taxon buildSubtribe(INonViralName tnb) {
4374
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4375
            if(tmp==null){
4376
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4377
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4378
                    tmp.setSec(state2.getConfig().getSecundum());
4379
                }
4380
                //                tmp.setSec(refMods);
4381
                //sourceHandler.addSource(refMods, tmp);
4382
                if(tribe != null) {
4383
                    classification.addParentChild(tribe, tmp, null, null);
4384
                    higherRank=Rank.TRIBE();
4385
                    higherTaxa=tribe;
4386
                } else{
4387
                    //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4388
                    classification.addChildTaxon(tmp, null, null);
4389
                }
4390
            }
4391
            return tmp;
4392
        }
4393
        /**
4394
         * @param tnb
4395
         * @return
4396
         */
4397
        private Taxon buildTribe(INonViralName tnb) {
4398
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4399
            if(tmp==null){
4400
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4401
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4402
                    tmp.setSec(state2.getConfig().getSecundum());
4403
                }
4404
                //                tmp.setSec(refMods);
4405
                //sourceHandler.addSource(refMods, tmp);
4406
                if (subfamily !=null) {
4407
                    classification.addParentChild(subfamily, tmp, null, null);
4408
                    higherRank=Rank.SUBFAMILY();
4409
                    higherTaxa=subfamily;
4410
                } else {
4411
                    if(family != null) {
4412
                        classification.addParentChild(family, tmp, null, null);
4413
                        higherRank=Rank.FAMILY();
4414
                        higherTaxa=family;
4415
                    }
4416
                    else{
4417
                        //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4418
                        classification.addChildTaxon(tmp, null, null);
4419
                    }
4420
                }
4421
            }
4422
            return tmp;
4423
        }
4424

    
4425
        /**
4426
         * @param identifier2
4427
         * @return
4428
         */
4429
        @SuppressWarnings("rawtypes")
4430
        private Taxon getTaxonByLSID(String identifier) {
4431
            //logger.info("getTaxonByLSID");
4432
            //            boolean lsidok=false;
4433
            String id = identifier.split("__")[0];
4434
            //            String source = identifier.split("__")[1];
4435
            LSID lsid = null;
4436
            if (id.indexOf("lsid")>-1){
4437
                try {
4438
                    lsid = new LSID(id);
4439
                    //                    lsidok=true;
4440
                } catch (MalformedLSIDException e) {
4441
                    logger.warn("Malformed LSID");
4442
                }
4443
            }
4444
            if (lsid !=null){
4445
                List<Taxon> taxa = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
4446
                LSID currentlsid=null;
4447
                for (Taxon t:taxa){
4448
                    currentlsid = t.getLsid();
4449
                    if (currentlsid !=null){
4450
                        if (currentlsid.getLsid().equals(lsid.getLsid())){
4451
                            try{
4452
                                return t;
4453
                            }
4454
                            catch(Exception e){logger.warn("Exception occurred while comparing LSIDs "+e );}
4455
                        }
4456
                    }
4457
                }
4458
            }
4459
            return null;
4460
        }
4461
        /**
4462
         * @param author2
4463
         * @return
4464
         */
4465
        @SuppressWarnings("rawtypes")
4466
        private Person findOrCreateAuthor(String author2) {
4467
            //logger.info("findOrCreateAuthor");
4468
            List<UuidAndTitleCache<Person>> hiberPersons = importer.getAgentService().getPersonUuidAndTitleCache();
4469
            for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
4470
                if(hibernateP.getTitleCache().equals(author2)) {
4471
                    AgentBase existing = importer.getAgentService().find(hibernateP.getUuid());
4472
                    return CdmBase.deproxy(existing, Person.class);
4473
                }
4474
            }
4475
            Person p = Person.NewInstance();
4476
            p.setTitleCache(author2,true);
4477
            importer.getAgentService().saveOrUpdate(p);
4478
            return CdmBase.deproxy(p, Person.class);
4479
        }
4480
        /**
4481
         * @param author the author to set
4482
         */
4483
        public void setAuthor(String author) {
4484
            this.author = author;
4485
        }
4486

    
4487
        /**
4488
         * @return the higherTaxa
4489
         */
4490
        public Taxon getHigherTaxa() {
4491
            return higherTaxa;
4492
        }
4493
        /**
4494
         * @param higherTaxa the higherTaxa to set
4495
         */
4496
        public void setHigherTaxa(Taxon higherTaxa) {
4497
            this.higherTaxa = higherTaxa;
4498
        }
4499
        /**
4500
         * @return the higherRank
4501
         */
4502
        public Rank getHigherRank() {
4503
            return higherRank;
4504
        }
4505
        /**
4506
         * @param higherRank the higherRank to set
4507
         */
4508
        public void setHigherRank(Rank higherRank) {
4509
            this.higherRank = higherRank;
4510
        }
4511
        public String getName(){
4512
            if (newName.isEmpty()) {
4513
                return originalName;
4514
            } else {
4515
                return newName;
4516
            }
4517

    
4518
        }
4519
        /**
4520
         * @return the fullName
4521
         */
4522
        public String getOriginalName() {
4523
            return originalName;
4524
        }
4525
        /**
4526
         * @param fullName the fullName to set
4527
         */
4528
        public void setOriginalName(String fullName) {
4529
            this.originalName = fullName;
4530
        }
4531
        /**
4532
         * @return the newName
4533
         */
4534
        public String getNewName() {
4535
            return newName;
4536
        }
4537
        /**
4538
         * @param newName the newName to set
4539
         */
4540
        public void setNewName(String newName) {
4541
            this.newName = newName;
4542
        }
4543
        /**
4544
         * @return the rank
4545
         */
4546
        public Rank getRank() {
4547
            return rank;
4548
        }
4549
        /**
4550
         * @param rank the rank to set
4551
         */
4552
        public void setRank(Rank rank) {
4553
            this.rank = rank;
4554
        }
4555
        /**
4556
         * @return the idenfitiger
4557
         */
4558
        public String getIdentifier() {
4559
            return identifier;
4560
        }
4561
        /**
4562
         * @param idenfitiger the idenfitiger to set
4563
         */
4564
        public void setIdentifier(String identifier) {
4565
            this.identifier = identifier;
4566
        }
4567
        /**
4568
         * @return the status
4569
         */
4570
        public String getStatus() {
4571
            if (status == null) {
4572
                return "";
4573
            }
4574
            return status;
4575
        }
4576
        /**
4577
         * @param status the status to set
4578
         */
4579
        public void setStatus(String status) {
4580
            this.status = status;
4581
        }
4582
        /**
4583
         * @return the family
4584
         */
4585
        public Taxon getFamily() {
4586
            return family;
4587
        }
4588
        /**
4589
         * @param family the family to set
4590
         */
4591
        @SuppressWarnings("rawtypes")
4592
        public void setFamily(Taxon family) {
4593
            this.family = family;
4594
            familyName = CdmBase.deproxy(family.getName());
4595
        }
4596
        /**
4597
         * @return the subfamily
4598
         */
4599
        public Taxon getSubfamily() {
4600
            return subfamily;
4601
        }
4602
        /**
4603
         * @param subfamily the subfamily to set
4604
         */
4605
        @SuppressWarnings("rawtypes")
4606
        public void setSubfamily(Taxon subfamily) {
4607
            this.subfamily = subfamily;
4608
            subfamilyName = CdmBase.deproxy(subfamily.getName());
4609
        }
4610
        /**
4611
         * @return the tribe
4612
         */
4613
        public Taxon getTribe() {
4614
            return tribe;
4615
        }
4616
        /**
4617
         * @param tribe the tribe to set
4618
         */
4619
        @SuppressWarnings("rawtypes")
4620
        public void setTribe(Taxon tribe) {
4621
            this.tribe = tribe;
4622
            tribeName = CdmBase.deproxy(tribe.getName());
4623
        }
4624
        /**
4625
         * @return the subtribe
4626
         */
4627
        public Taxon getSubtribe() {
4628
            return subtribe;
4629
        }
4630
        /**
4631
         * @param subtribe the subtribe to set
4632
         */
4633
        @SuppressWarnings("rawtypes")
4634
        public void setSubtribe(Taxon subtribe) {
4635
            this.subtribe = subtribe;
4636
            subtribeName =CdmBase.deproxy(subtribe.getName());
4637
        }
4638
        /**
4639
         * @return the genus
4640
         */
4641
        public Taxon getGenus() {
4642
            return genus;
4643
        }
4644
        /**
4645
         * @param genus the genus to set
4646
         */
4647
        @SuppressWarnings("rawtypes")
4648
        public void setGenus(Taxon genus) {
4649
            if (genus != null){
4650
	        	this.genus = genus;
4651
	            genusName = CdmBase.deproxy(genus.getName());
4652
            }
4653
        }
4654
        /**
4655
         * @return the subgenus
4656
         */
4657
        public Taxon getSubgenus() {
4658
            return subgenus;
4659
        }
4660
        /**
4661
         * @param subgenus the subgenus to set
4662
         */
4663
        @SuppressWarnings("rawtypes")
4664
        public void setSubgenus(Taxon subgenus) {
4665
            this.subgenus = subgenus;
4666
            subgenusName = CdmBase.deproxy(subgenus.getName());
4667
        }
4668
        /**
4669
         * @return the species
4670
         */
4671
        public Taxon getSpecies() {
4672
            return species;
4673
        }
4674
        /**
4675
         * @param species the species to set
4676
         */
4677
        public void setSpecies(Taxon species) {
4678
        	if (species != null){
4679
	            this.species = species;
4680
	            speciesName = CdmBase.deproxy(species.getName());
4681
        	}
4682
        }
4683
        /**
4684
         * @return the subspecies
4685
         */
4686
        public Taxon getSubspecies() {
4687
            return subspecies;
4688
        }
4689
        /**
4690
         * @param subspecies the subspecies to set
4691
         */
4692
        @SuppressWarnings("rawtypes")
4693
        public void setSubspecies(Taxon subspecies) {
4694
            this.subspecies = subspecies;
4695
            subspeciesName = CdmBase.deproxy(subspecies.getName());
4696

    
4697
        }
4698

    
4699

    
4700

    
4701
    }
4702

    
4703

    
4704
    /**
4705
     * @param status
4706
     */
4707
    private void addProblematicStatusToFile(String status) {
4708
        try{
4709
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "StatusUnknown_"+classification.getTitleCache()+".txt",true);
4710
            BufferedWriter out = new BufferedWriter(fstream);
4711
            out.write(status+"\n");
4712
            //Close the output stream
4713
            out.close();
4714
        }catch (Exception e){//Catch exception if any
4715
            System.err.println("Error: " + e.getMessage());
4716
        }
4717

    
4718
    }
4719

    
4720

    
4721

    
4722
    /**
4723
     * @param tnb
4724
     * @return
4725
     */
4726
    private Taxon findMatchingTaxon(INonViralName tnb, Reference refMods) {
4727
        logger.info("findMatchingTaxon");
4728
        Taxon tmp=null;
4729

    
4730
        refMods=CdmBase.deproxy(refMods, Reference.class);
4731
        boolean insertAsExisting =false;
4732
        List<Taxon> existingTaxa = new ArrayList<Taxon>();
4733
        try {
4734
            existingTaxa = getMatchingTaxa(TaxonName.castAndDeproxy(tnb));
4735
        } catch (Exception e1) {
4736
            // TODO Auto-generated catch block
4737
            e1.printStackTrace();
4738
        }
4739
        double similarityScore=0.0;
4740
        double similarityAuthor=-1;
4741
        String author1="";
4742
        String author2="";
4743
        String t1="";
4744
        String t2="";
4745
        for (Taxon bestMatchingTaxon : existingTaxa){
4746
            if (!existingTaxa.isEmpty() && state2.getConfig().isInteractWithUser() && !insertAsExisting) {
4747
                //                System.out.println("tnb "+tnb.getTitleCache());
4748
                //                System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4749
                try {
4750
                    if(tnb.getAuthorshipCache()!=null) {
4751
                        author1=tnb.getAuthorshipCache();
4752
                    }
4753
                } catch (Exception e) {
4754
                    // TODO Auto-generated catch block
4755
                    e.printStackTrace();
4756
                }
4757
                try {
4758
                    if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
4759
                        author2=bestMatchingTaxon.getName().getAuthorshipCache();
4760
                    }
4761
                } catch (Exception e) {
4762
                    // TODO Auto-generated catch block
4763
                    e.printStackTrace();
4764
                }
4765
                try {
4766
                    t1=tnb.getTitleCache().split("sec.")[0].trim();
4767
                    if (author1!=null && !StringUtils.isEmpty(author1)) {
4768
                        t1=t1.split(Pattern.quote(author1))[0];
4769
                    }
4770
                } catch (Exception e) {
4771
                    // TODO Auto-generated catch block
4772
                    e.printStackTrace();
4773
                }
4774
                try {
4775
                    t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
4776
                    if (author2!=null && !StringUtils.isEmpty(author2)) {
4777
                        t2=t2.split(Pattern.quote(author2))[0];
4778
                    }
4779
                } catch (Exception e) {
4780
                    // TODO Auto-generated catch block
4781
                    e.printStackTrace();
4782
                }
4783
                similarityScore=similarity(t1.trim(), t2.trim());
4784
                //                System.out.println("taxascore: "+similarityScore);
4785
                similarityAuthor=similarity(author1.trim(), author2.trim());
4786
                //                System.out.println("authorscore: "+similarityAuthor);
4787
                insertAsExisting = compareAndCheckTaxon(tnb, refMods, similarityScore, bestMatchingTaxon,similarityAuthor);
4788
            }
4789
            if(insertAsExisting) {
4790
                //System.out.println("KEEP "+bestMatchingTaxon.toString());
4791
                tmp=bestMatchingTaxon;
4792
                sourceHandler.addSource(refMods, tmp);
4793
                return tmp;
4794
            }
4795
        }
4796
        return tmp;
4797
    }
4798

    
4799

    
4800
    /**
4801
     * @param tnb
4802
     * @param refMods
4803
     * @param similarityScore
4804
     * @param bestMatchingTaxon
4805
     * @param similarityAuthor
4806
     * @return
4807
     */
4808
    private boolean compareAndCheckTaxon(INonViralName tnb, Reference refMods, double similarityScore,
4809
            Taxon bestMatchingTaxon, double similarityAuthor) {
4810
        //logger.info("compareAndCheckTaxon");
4811
        boolean insertAsExisting;
4812
        //        if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4813
        //            insertAsExisting=false;
4814
        //        } else{
4815
        //a small hack/automatisation for Chenopodium only
4816
        if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4817
                bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4818
            insertAsExisting=true;
4819
        } else {
4820
            insertAsExisting=askIfReuseBestMatchingTaxon(tnb, bestMatchingTaxon, refMods, similarityScore,similarityAuthor);
4821
        }
4822
        //        }
4823

    
4824
        logDecision(tnb, bestMatchingTaxon, insertAsExisting, refMods);
4825
        return insertAsExisting;
4826
    }
4827

    
4828
    /**
4829
     * @return
4830
     */
4831
    @SuppressWarnings("rawtypes")
4832
    private List<Taxon> getMatchingTaxa(TaxonName tnb) {
4833
        //logger.info("getMatchingTaxon");
4834
    	if (tnb.getTitleCache() == null){
4835
    		tnb.setTitleCache(tnb.toString(), tnb.isProtectedTitleCache());
4836
    	}
4837

    
4838
        Pager<TaxonBase> pager=importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, tnb.getTitleCache().split("sec.")[0].trim(), MatchMode.BEGINNING, null, null, null, null, null);
4839
        List<TaxonBase>records = pager.getRecords();
4840

    
4841
        List<Taxon> existingTaxons = new ArrayList<Taxon>();
4842
        for (TaxonBase r:records){
4843
            try{
4844
                Taxon bestMatchingTaxon = (Taxon)r;
4845
                //                System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4846
                if(compareTaxonNameLength(bestMatchingTaxon.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4847
                    existingTaxons.add(bestMatchingTaxon);
4848
                }
4849
            }catch(ClassCastException e){logger.warn("classcast exception, might be a synonym, ignore it");}
4850
        }
4851
        Taxon bmt = importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
4852
        if (!existingTaxons.contains(bmt) && bmt!=null) {
4853
            if(compareTaxonNameLength(bmt.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4854
                existingTaxons.add(bmt);
4855
            }
4856
        }
4857
        return existingTaxons;
4858
    }
4859

    
4860
    /**
4861
     * Check if the found Taxon can reasonnably be the same
4862
     * example: with and without author should match, but the subspecies should not be suggested for a genus
4863
     * */
4864
    private boolean compareTaxonNameLength(String f, String o){
4865
        //logger.info("compareTaxonNameLength");
4866
        boolean lengthOk=false;
4867
        int sizeF = f.length();
4868
        int sizeO = o.length();
4869
        if (sizeO>=sizeF) {
4870
            lengthOk=true;
4871
        }
4872
        if(sizeF>sizeO) {
4873
            if (sizeF-sizeO>10) {
4874
                lengthOk=false;
4875
            } else {
4876
                lengthOk=true;
4877
            }
4878
        }
4879

    
4880
        //        System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4881
        return lengthOk;
4882
    }
4883

    
4884
    private double similarity(String s1, String s2) {
4885
        //logger.info("similarity");
4886
        //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4887
        if(!StringUtils.isEmpty(s1) && !StringUtils.isEmpty(s2)){
4888
            String l1=s1.toLowerCase().trim();
4889
            String l2=s2.toLowerCase().trim();
4890
            if (l1.length() < l2.length()) { // s1 should always be bigger
4891
                String swap = l1; l1 = l2; l2 = swap;
4892
            }
4893
            int bigLen = l1.length();
4894
            if (bigLen == 0) { return 1.0; /* both strings are zero length */ }
4895
            return (bigLen - computeEditDistance(l1, l2)) / (double) bigLen;
4896
        }
4897
        else{
4898
            if(s1!=null && s2!=null){
4899
                if (s1.equalsIgnoreCase(s2)) {
4900
                    return 1;
4901
                }
4902
            }
4903
            return -1;
4904
        }
4905
    }
4906

    
4907
    private int computeEditDistance(String s1, String s2) {
4908
        //logger.info("computeEditDistance");
4909
        int[] costs = new int[s2.length() + 1];
4910
        for (int i = 0; i <= s1.length(); i++) {
4911
            int lastValue = i;
4912
            for (int j = 0; j <= s2.length(); j++) {
4913
                if (i == 0) {
4914
                    costs[j] = j;
4915
                } else {
4916
                    if (j > 0) {
4917
                        int newValue = costs[j - 1];
4918
                        if (s1.charAt(i - 1) != s2.charAt(j - 1)) {
4919
                            newValue = Math.min(Math.min(newValue, lastValue),
4920
                                    costs[j]) + 1;
4921
                        }
4922
                        costs[j - 1] = lastValue;
4923
                        lastValue = newValue;
4924
                    }
4925
                }
4926
            }
4927
            if (i > 0) {
4928
                costs[s2.length()] = lastValue;
4929
            }
4930
        }
4931
        return costs[s2.length()];
4932
    }
4933

    
4934
    Map<Rank, Taxon> hierarchy = new HashMap<Rank, Taxon>();
4935
    /**
4936
     * @param taxonName
4937
     */
4938
    @SuppressWarnings("rawtypes")
4939
    public void lookForParentNode(INonViralName taxonName, Taxon tax, Reference ref, MyName myName) {
4940
        logger.info("lookForParentNode "+taxonName.getTitleCache()+" for "+myName.toString());
4941
        //System.out.println("LOOK FOR PARENT NODE "+taxonname.toString()+"; "+tax.toString()+"; "+taxonname.getRank());
4942
        INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
4943
        if (taxonName.getRank().equals(Rank.FORM())){
4944
            handleFormHierarchy(ref, myName, parser);
4945
        }
4946
        else if (taxonName.getRank().equals(Rank.VARIETY())){
4947
            handleVarietyHierarchy(ref, myName, parser);
4948
        }
4949
        else if (taxonName.getRank().equals(Rank.SUBSPECIES())){
4950
            handleSubSpeciesHierarchy(ref, myName, parser);
4951
        }
4952
        else if (taxonName.getRank().equals(Rank.SPECIES())){
4953
            handleSpeciesHierarchy(ref, myName, parser);
4954
        }
4955
        else if (taxonName.getRank().equals(Rank.SUBGENUS())){
4956
            handleSubgenusHierarchy(ref, myName, parser);
4957
        }
4958

    
4959
        if (taxonName.getRank().equals(Rank.GENUS())){
4960
            handleGenusHierarchy(ref, myName, parser);
4961
        }
4962
        if (taxonName.getRank().equals(Rank.SUBTRIBE())){
4963
            handleSubtribeHierarchy(ref, myName, parser);
4964
        }
4965
        if (taxonName.getRank().equals(Rank.TRIBE())){
4966
            handleTribeHierarchy(ref, myName, parser);
4967
        }
4968

    
4969
        if (taxonName.getRank().equals(Rank.SUBFAMILY())){
4970
            handleSubfamilyHierarchy(ref, myName, parser);
4971
        }
4972
    }
4973

    
4974
    /**
4975
     * @param ref
4976
     * @param myName
4977
     * @param parser
4978
     */
4979
    private void handleSubfamilyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
4980
        System.out.println("handleSubfamilyHierarchy");
4981
        String parentStr = myName.getFamilyStr();
4982
        Rank r = Rank.FAMILY();
4983
        if(parentStr!=null){
4984

    
4985
            Taxon parent = null;
4986
            Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitleWithRestrictions(TaxonBase.class, parentStr, MatchMode.BEGINNING, null, null, null, null, null);
4987
            for(TaxonBase tb:taxontest.getRecords()){
4988
                try {
4989
                    if (tb.getName().getRank().equals(r)) {
4990
                        parent=CdmBase.deproxy(tb, Taxon.class);
4991
                    }
4992
                    break;
4993
                } catch (Exception e) {
4994
                    // TODO Auto-generated catch block
4995
                    e.printStackTrace();
4996
                }
4997
            }
4998
            if(parent == null) {
4999
                INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5000
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5001
                if(tmp ==null)
5002
                {
5003
                    parent=Taxon.NewInstance(parentNameName, ref);
5004
                    importer.getTaxonService().save(parent);
5005
                    parent = CdmBase.deproxy(parent, Taxon.class);
5006
                } else {
5007
                    parent=tmp;
5008
                }
5009
                lookForParentNode(parentNameName, parent, ref,myName);
5010

    
5011
            }
5012
            hierarchy.put(r,parent);
5013
        }
5014
    }
5015

    
5016
    /**
5017
     * @param ref
5018
     * @param myName
5019
     * @param parser
5020
     */
5021
    private void handleTribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5022
        String parentStr = myName.getSubfamilyStr();
5023
        Rank r = Rank.SUBFAMILY();
5024
        if (parentStr == null){
5025
            parentStr = myName.getFamilyStr();
5026
            r = Rank.FAMILY();
5027
        }
5028
        if(parentStr!=null){
5029
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5030
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5031
            //                    importer.getTaxonService().save(parent);
5032
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5033

    
5034
            boolean parentDoesNotExists = true;
5035
            for (TaxonNode p : classification.getAllNodes()){
5036
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5037
                    parentDoesNotExists = false;
5038
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5039
                    break;
5040
                }
5041
            }
5042
            //                if(parentDoesNotExists) {
5043
            //                    importer.getTaxonService().save(parent);
5044
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5045
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5046
            //                }
5047
            if(parentDoesNotExists) {
5048
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5049
                if(tmp ==null)
5050
                {
5051
                    parent=Taxon.NewInstance(parentNameName, ref);
5052
                    importer.getTaxonService().save(parent);
5053
                    parent = CdmBase.deproxy(parent, Taxon.class);
5054
                } else {
5055
                    parent=tmp;
5056
                }
5057
                lookForParentNode(parentNameName, parent, ref,myName);
5058

    
5059
            }
5060
            hierarchy.put(r,parent);
5061
        }
5062
    }
5063

    
5064
    /**
5065
     * @param ref
5066
     * @param myName
5067
     * @param parser
5068
     */
5069
    private void handleSubtribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5070
        String parentStr = myName.getTribeStr();
5071
        Rank r = Rank.TRIBE();
5072
        if (parentStr == null){
5073
            parentStr = myName.getSubfamilyStr();
5074
            r = Rank.SUBFAMILY();
5075
        }
5076
        if (parentStr == null){
5077
            parentStr = myName.getFamilyStr();
5078
            r = Rank.FAMILY();
5079
        }
5080
        if(parentStr!=null){
5081
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5082
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5083
            //                    importer.getTaxonService().save(parent);
5084
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5085

    
5086
            boolean parentDoesNotExists = true;
5087
            for (TaxonNode p : classification.getAllNodes()){
5088
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5089
                    parentDoesNotExists = false;
5090
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5091

    
5092
                    break;
5093
                }
5094
            }
5095
            //                if(parentDoesNotExists) {
5096
            //                    importer.getTaxonService().save(parent);
5097
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5098
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5099
            //                }
5100
            if(parentDoesNotExists) {
5101
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5102
                if(tmp ==null)
5103
                {
5104
                    parent=Taxon.NewInstance(parentNameName, ref);
5105
                    importer.getTaxonService().save(parent);
5106
                    parent = CdmBase.deproxy(parent, Taxon.class);
5107
                } else {
5108
                    parent=tmp;
5109
                }
5110
                lookForParentNode(parentNameName, parent, ref,myName);
5111

    
5112
            }
5113
            hierarchy.put(r,parent);
5114
        }
5115
    }
5116

    
5117
    /**
5118
     * @param ref
5119
     * @param myName
5120
     * @param parser
5121
     */
5122
    private void handleGenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5123
        String parentStr = myName.getSubtribeStr();
5124
        Rank r = Rank.SUBTRIBE();
5125
        if (parentStr == null){
5126
            parentStr = myName.getTribeStr();
5127
            r = Rank.TRIBE();
5128
        }
5129
        if (parentStr == null){
5130
            parentStr = myName.getSubfamilyStr();
5131
            r = Rank.SUBFAMILY();
5132
        }
5133
        if (parentStr == null){
5134
            parentStr = myName.getFamilyStr();
5135
            r = Rank.FAMILY();
5136
        }
5137
        if(parentStr!=null){
5138
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5139
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5140
            //                    importer.getTaxonService().save(parent);
5141
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5142

    
5143
            boolean parentDoesNotExist = true;
5144
            for (TaxonNode p : classification.getAllNodes()){
5145
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5146
                    //                        System.out.println(p.getTaxon().getUuid());
5147
                    //                        System.out.println(parent.getUuid());
5148
                    parentDoesNotExist = false;
5149
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5150
                    break;
5151
                }
5152
            }
5153
            //                if(parentDoesNotExists) {
5154
            //                    importer.getTaxonService().save(parent);
5155
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5156
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5157
            //                }
5158
            if(parentDoesNotExist) {
5159
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5160
                if(tmp ==null){
5161

    
5162
                    parent=Taxon.NewInstance(parentNameName, ref);
5163
                    importer.getTaxonService().save(parent);
5164
                    parent = CdmBase.deproxy(parent, Taxon.class);
5165
                } else {
5166
                    parent=tmp;
5167
                }
5168
                lookForParentNode(parentNameName, parent, ref,myName);
5169

    
5170
            }
5171
            hierarchy.put(r,parent);
5172
        }
5173
    }
5174

    
5175
    /**
5176
     * @param ref
5177
     * @param myName
5178
     * @param parser
5179
     */
5180
    private void handleSubgenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5181
        String parentStr = myName.getGenusStr();
5182
        Rank r = Rank.GENUS();
5183

    
5184
        if(parentStr==null){
5185
            parentStr = myName.getSubtribeStr();
5186
            r = Rank.SUBTRIBE();
5187
        }
5188
        if (parentStr == null){
5189
            parentStr = myName.getTribeStr();
5190
            r = Rank.TRIBE();
5191
        }
5192
        if (parentStr == null){
5193
            parentStr = myName.getSubfamilyStr();
5194
            r = Rank.SUBFAMILY();
5195
        }
5196
        if (parentStr == null){
5197
            parentStr = myName.getFamilyStr();
5198
            r = Rank.FAMILY();
5199
        }
5200
        if(parentStr!=null){
5201
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5202
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5203
            //                    importer.getTaxonService().save(parent);
5204
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5205

    
5206
            boolean parentDoesNotExists = true;
5207
            for (TaxonNode p : classification.getAllNodes()){
5208
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5209
                    //                        System.out.println(p.getTaxon().getUuid());
5210
                    //                        System.out.println(parent.getUuid());
5211
                    parentDoesNotExists = false;
5212
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5213
                    break;
5214
                }
5215
            }
5216
            //                if(parentDoesNotExists) {
5217
            //                    importer.getTaxonService().save(parent);
5218
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5219
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5220
            //                }
5221
            if(parentDoesNotExists) {
5222
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5223
                if(tmp ==null)
5224
                {
5225
                    parent=Taxon.NewInstance(parentNameName, ref);
5226
                    importer.getTaxonService().save(parent);
5227
                    parent = CdmBase.deproxy(parent, Taxon.class);
5228
                } else {
5229
                    parent=tmp;
5230
                }
5231
                lookForParentNode(parentNameName, parent, ref,myName);
5232

    
5233
            }
5234
            hierarchy.put(r,parent);
5235
        }
5236
    }
5237

    
5238
    /**
5239
     * @param ref
5240
     * @param myName
5241
     * @param parser
5242
     */
5243
    private void handleSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5244
        String parentStr = myName.getSubgenusStr();
5245
        Rank r = Rank.SUBGENUS();
5246

    
5247
        if(parentStr==null){
5248
            parentStr = myName.getGenusStr();
5249
            r = Rank.GENUS();
5250
        }
5251

    
5252
        if(parentStr==null){
5253
            parentStr = myName.getSubtribeStr();
5254
            r = Rank.SUBTRIBE();
5255
        }
5256
        if (parentStr == null){
5257
            parentStr = myName.getTribeStr();
5258
            r = Rank.TRIBE();
5259
        }
5260
        if (parentStr == null){
5261
            parentStr = myName.getSubfamilyStr();
5262
            r = Rank.SUBFAMILY();
5263
        }
5264
        if (parentStr == null){
5265
            parentStr = myName.getFamilyStr();
5266
            r = Rank.FAMILY();
5267
        }
5268
        if(parentStr!=null){
5269
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5270
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5271
            hierarchy.put(r,parent);
5272
        }
5273
    }
5274

    
5275
    /**
5276
     * @param ref
5277
     * @param myName
5278
     * @param parser
5279
     */
5280
    private void handleSubSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5281
        String parentStr = myName.getSpeciesStr();
5282
        Rank r = Rank.SPECIES();
5283

    
5284

    
5285
        if(parentStr==null){
5286
            parentStr = myName.getSubgenusStr();
5287
            r = Rank.SUBGENUS();
5288
        }
5289

    
5290
        if(parentStr==null){
5291
            parentStr = myName.getGenusStr();
5292
            r = Rank.GENUS();
5293
        }
5294

    
5295
        if(parentStr==null){
5296
            parentStr = myName.getSubtribeStr();
5297
            r = Rank.SUBTRIBE();
5298
        }
5299
        if (parentStr == null){
5300
            parentStr = myName.getTribeStr();
5301
            r = Rank.TRIBE();
5302
        }
5303
        if (parentStr == null){
5304
            parentStr = myName.getSubfamilyStr();
5305
            r = Rank.SUBFAMILY();
5306
        }
5307
        if (parentStr == null){
5308
            parentStr = myName.getFamilyStr();
5309
            r = Rank.FAMILY();
5310
        }
5311
        if(parentStr!=null){
5312
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5313
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5314
            hierarchy.put(r,parent);
5315
        }
5316
    }
5317

    
5318

    
5319
    /**
5320
     * @param ref
5321
     * @param myName
5322
     * @param parser
5323
     */
5324
    private void handleFormHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5325
        String parentStr = myName.getSubspeciesStr();
5326
        Rank r = Rank.SUBSPECIES();
5327

    
5328

    
5329
        if(parentStr==null){
5330
            parentStr = myName.getSpeciesStr();
5331
            r = Rank.SPECIES();
5332
        }
5333

    
5334
        if(parentStr==null){
5335
            parentStr = myName.getSubgenusStr();
5336
            r = Rank.SUBGENUS();
5337
        }
5338

    
5339
        if(parentStr==null){
5340
            parentStr = myName.getGenusStr();
5341
            r = Rank.GENUS();
5342
        }
5343

    
5344
        if(parentStr==null){
5345
            parentStr = myName.getSubtribeStr();
5346
            r = Rank.SUBTRIBE();
5347
        }
5348
        if (parentStr == null){
5349
            parentStr = myName.getTribeStr();
5350
            r = Rank.TRIBE();
5351
        }
5352
        if (parentStr == null){
5353
            parentStr = myName.getSubfamilyStr();
5354
            r = Rank.SUBFAMILY();
5355
        }
5356
        if (parentStr == null){
5357
            parentStr = myName.getFamilyStr();
5358
            r = Rank.FAMILY();
5359
        }
5360
        if(parentStr!=null){
5361
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5362
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5363
            hierarchy.put(r,parent);
5364
        }
5365
    }
5366

    
5367
    /**
5368
     * @param ref
5369
     * @param myName
5370
     * @param parser
5371
     */
5372
    private void handleVarietyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5373
        String parentStr = myName.getSubspeciesStr();
5374
        Rank r = Rank.SUBSPECIES();
5375

    
5376
        if(parentStr==null){
5377
            parentStr = myName.getSpeciesStr();
5378
            r = Rank.SPECIES();
5379
        }
5380

    
5381
        if(parentStr==null){
5382
            parentStr = myName.getSubgenusStr();
5383
            r = Rank.SUBGENUS();
5384
        }
5385

    
5386
        if(parentStr==null){
5387
            parentStr = myName.getGenusStr();
5388
            r = Rank.GENUS();
5389
        }
5390

    
5391
        if(parentStr==null){
5392
            parentStr = myName.getSubtribeStr();
5393
            r = Rank.SUBTRIBE();
5394
        }
5395
        if (parentStr == null){
5396
            parentStr = myName.getTribeStr();
5397
            r = Rank.TRIBE();
5398
        }
5399
        if (parentStr == null){
5400
            parentStr = myName.getSubfamilyStr();
5401
            r = Rank.SUBFAMILY();
5402
        }
5403
        if (parentStr == null){
5404
            parentStr = myName.getFamilyStr();
5405
            r = Rank.FAMILY();
5406
        }
5407
        if(parentStr!=null){
5408
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5409
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5410
            hierarchy.put(r,parent);
5411
        }
5412
    }
5413

    
5414
    /**
5415
     * @param ref
5416
     * @param myName
5417
     * @param parser
5418
     * @param parentStr
5419
     * @param r
5420
     * @return
5421
     */
5422
    private Taxon handleParentName(Reference ref, MyName myName, INonViralNameParser<?> parser, String parentStr, Rank r) {
5423
        INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5424
        Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5425
        //                    importer.getTaxonService().save(parent);
5426
        //                    parent = CdmBase.deproxy(parent, Taxon.class);
5427

    
5428
        boolean parentDoesNotExists = true;
5429
        for (TaxonNode p : classification.getAllNodes()){
5430
            if(p.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent.getTitleCache().split("sec.")[0].trim())) {
5431
                //                        System.out.println(p.getTaxon().getUuid());
5432
                //                        System.out.println(parent.getUuid());
5433
                parentDoesNotExists = false;
5434
                parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5435
                break;
5436
            }
5437
        }
5438
        if(parentDoesNotExists) {
5439
            Taxon tmp = findMatchingTaxon(parentNameName,ref);
5440
            //                    System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5441
            if(tmp ==null){
5442

    
5443
                parent=Taxon.NewInstance(parentNameName, ref);
5444
                importer.getTaxonService().save(parent);
5445

    
5446
            } else {
5447
                parent=tmp;
5448
            }
5449
            lookForParentNode(parentNameName, parent, ref,myName);
5450

    
5451
        }
5452
        return parent;
5453
    }
5454

    
5455
    private void addNameDifferenceToFile(String originalname, String atomisedname){
5456
        try{
5457
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NamesDifferent_"+classification.getTitleCache()+".txt",true);
5458
            BufferedWriter out = new BufferedWriter(fstream);
5459
            out.write(originalname+" (original) versus "+replaceNull(atomisedname)+" (atomised) \n");
5460
            //Close the output stream
5461
            out.close();
5462
        }catch (Exception e){//Catch exception if any
5463
            System.err.println("Error: " + e.getMessage());
5464
        }
5465
    }
5466
    /**
5467
     * @param name
5468
     * @param author
5469
     * @param nomenclaturalCode2
5470
     * @param rank
5471
     */
5472
    private void addProblemNameToFile(String name, String author, NomenclaturalCode nomenclaturalCode2, Rank rank) {
5473
        try{
5474
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed.txt",true);
5475
            BufferedWriter out = new BufferedWriter(fstream);
5476
            out.write(name+"\t"+replaceNull(author)+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\n");
5477
            //Close the output stream
5478
            out.close();
5479
        }catch (Exception e){//Catch exception if any
5480
            System.err.println("Error: " + e.getMessage());
5481
        }
5482
    }
5483

    
5484

    
5485
    /**
5486
     * @param tnb
5487
     * @param bestMatchingTaxon
5488
     * @param insertAsExisting
5489
     * @param refMods
5490
     */
5491
    private void logDecision(INonViralName tnb, Taxon bestMatchingTaxon, boolean insertAsExisting, Reference refMods) {
5492
        try{
5493
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "Decisions_"+classification.toString()+".txt", true);
5494
            BufferedWriter out = new BufferedWriter(fstream);
5495
            out.write(tnb.getTitleCache() + " sec. " + refMods + "\t" + bestMatchingTaxon.getTitleCache() + "\t" + insertAsExisting + "\n");
5496
            //Close the output stream
5497
            out.close();
5498
        }catch (Exception e){//Catch exception if any
5499
            System.err.println("Error: " + e.getMessage());
5500
        }
5501
    }
5502

    
5503

    
5504
    @SuppressWarnings("unused")
5505
    private String replaceNull(Object in){
5506
        if (in == null) {
5507
            return "";
5508
        }
5509
        if (in.getClass().equals(NomenclaturalCode.class)) {
5510
            return ((NomenclaturalCode)in).getTitleCache();
5511
        }
5512
        return in.toString();
5513
    }
5514

    
5515
    /**
5516
     * @param fullName
5517
     * @param nomenclaturalCode2
5518
     * @param rank
5519
     */
5520
    private void addProblemNameToFile(String type, String name, NomenclaturalCode nomenclaturalCode2, Rank rank, String problems) {
5521
        try{
5522
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed_"+classification.getTitleCache()+".txt",true);
5523
            BufferedWriter out = new BufferedWriter(fstream);
5524
            out.write(type+"\t"+name+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\t"+problems+"\n");
5525
            //Close the output stream
5526
            out.close();
5527
        }catch (Exception e){//Catch exception if any
5528
            System.err.println("Error: " + e.getMessage());
5529
        }
5530

    
5531
    }
5532

    
5533
}
5534

    
5535

    
5536

    
(8-8/9)