Project

General

Profile

Download (236 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
 * Copyright (C) 2013 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9
package eu.etaxonomy.cdm.io.taxonx2013;
10

    
11
import java.io.BufferedWriter;
12
import java.io.File;
13
import java.io.FileWriter;
14
import java.io.IOException;
15
import java.net.URI;
16
import java.util.ArrayList;
17
import java.util.Arrays;
18
import java.util.HashMap;
19
import java.util.List;
20
import java.util.Map;
21
import java.util.Set;
22
import java.util.UUID;
23
import java.util.regex.Matcher;
24
import java.util.regex.Pattern;
25

    
26
import javax.xml.transform.TransformerException;
27
import javax.xml.transform.TransformerFactoryConfigurationError;
28

    
29
import org.apache.commons.lang.StringUtils;
30
import org.apache.log4j.Logger;
31
import org.w3c.dom.Node;
32
import org.w3c.dom.NodeList;
33

    
34
import com.ibm.lsid.MalformedLSIDException;
35

    
36
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
37
import eu.etaxonomy.cdm.api.service.pager.Pager;
38
import eu.etaxonomy.cdm.model.agent.AgentBase;
39
import eu.etaxonomy.cdm.model.agent.Person;
40
import eu.etaxonomy.cdm.model.common.CdmBase;
41
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
42
import eu.etaxonomy.cdm.model.common.LSID;
43
import eu.etaxonomy.cdm.model.common.Language;
44
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
45
import eu.etaxonomy.cdm.model.description.Feature;
46
import eu.etaxonomy.cdm.model.description.FeatureNode;
47
import eu.etaxonomy.cdm.model.description.FeatureTree;
48
import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
49
import eu.etaxonomy.cdm.model.description.TaxonDescription;
50
import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
51
import eu.etaxonomy.cdm.model.description.TextData;
52
import eu.etaxonomy.cdm.model.name.BacterialName;
53
import eu.etaxonomy.cdm.model.name.BotanicalName;
54
import eu.etaxonomy.cdm.model.name.INonViralName;
55
import eu.etaxonomy.cdm.model.name.ITaxonNameBase;
56
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
57
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
58
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
59
import eu.etaxonomy.cdm.model.name.NonViralName;
60
import eu.etaxonomy.cdm.model.name.Rank;
61
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
62
import eu.etaxonomy.cdm.model.name.ZoologicalName;
63
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
64
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
65
import eu.etaxonomy.cdm.model.reference.Reference;
66
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
67
import eu.etaxonomy.cdm.model.taxon.Classification;
68
import eu.etaxonomy.cdm.model.taxon.Synonym;
69
import eu.etaxonomy.cdm.model.taxon.SynonymType;
70
import eu.etaxonomy.cdm.model.taxon.Taxon;
71
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
72
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
73
import eu.etaxonomy.cdm.persistence.dto.UuidAndTitleCache;
74
import eu.etaxonomy.cdm.persistence.query.MatchMode;
75
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
76
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
77
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
78
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImplRegExBase;
79

    
80
/**
81
 * @author pkelbert
82
 * @date 2 avr. 2013
83
 *
84
 */
85
public class TaxonXTreatmentExtractor extends TaxonXExtractor{
86

    
87
    private static final String PUBLICATION_YEAR = "publicationYear";
88

    
89
	private static final Logger logger = Logger.getLogger(TaxonXTreatmentExtractor.class);
90

    
91
    private static final String notMarkedUp = "Not marked-up";
92
    private static final UUID proIbioTreeUUID = UUID.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
93
    private static final UUID OtherUUID = UUID.fromString("6465f8aa-2175-446f-807e-7163994b120f");
94
    private static final UUID NotMarkedUpUUID = UUID.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
95
    private static final boolean skippQuestion = true;
96

    
97
    private final NomenclaturalCode nomenclaturalCode;
98
    private Classification classification;
99

    
100
    private  String treatmentMainName,originalTreatmentName;
101

    
102
    private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
103

    
104

    
105
    private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
106
    private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
107

    
108
    private boolean maxRankRespected =false;
109
    private Map<String, Feature> featuresMap;
110

    
111
    private MyName currentMyName;
112

    
113
    private Reference sourceUrlRef;
114

    
115
    private String followingText;  //text element immediately following a tax:name in tax:nomenclature TODO move do state
116
    private String usedFollowingTextPrefix; //the part of the following text which has been used during taxon name creation
117

    
118
    private final TaxonXAddSources sourceHandler = new TaxonXAddSources();
119

    
120
    /**
121
     * @param nomenclaturalCode
122
     * @param classification
123
     * @param importer
124
     * @param configState
125
     */
126
    public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
127
            TaxonXImportState configState,Map<String, Feature> featuresMap,  Reference urlSource) {
128
        this.nomenclaturalCode=nomenclaturalCode;
129
        this.classification = classification;
130
        this.importer=importer;
131
        this.state2=configState;
132
        this.featuresMap=featuresMap;
133
        this.sourceUrlRef =urlSource;
134
        prepareCollectors(configState, importer.getAgentService());
135
        this.sourceHandler.setSourceUrlRef(sourceUrlRef);
136
        this.sourceHandler.setImporter(importer);
137
        this.sourceHandler.setConfigState(configState);
138
    }
139

    
140
    /**
141
     * extracts all the treament information and save them
142
     * @param treatmentnode: the XML Node
143
     * @param tosave: the list of object to save into the CDM
144
     * @param refMods: the reference extracted from the MODS
145
     * @param sourceName: the URI of the document
146
     */
147
    @SuppressWarnings({ "rawtypes", "unused" })
148

    
149
    protected void extractTreatment(Node treatmentnode, Reference refMods, URI sourceName) {        logger.info("extractTreatment");
150
        List<TaxonNameBase> namesToSave = new ArrayList<TaxonNameBase>();
151
        NodeList children = treatmentnode.getChildNodes();
152
        Taxon acceptedTaxon =null;
153
        boolean hasRefgroup=false;
154

    
155
        //needed?
156
        for (int i=0;i<children.getLength();i++){
157
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
158
            	hasRefgroup=true;
159
            }
160
        }
161

    
162
        for (int i=0;i<children.getLength();i++){
163
        	Node child = children.item(i);
164
    		acceptedTaxon = handleSingleNode(refMods, sourceName, namesToSave, child, acceptedTaxon);
165
        }
166
        //        logger.info("saveUpdateNames");
167
        if (maxRankRespected){
168
            importer.getNameService().saveOrUpdate(namesToSave);
169
            importer.getClassificationService().saveOrUpdate(classification);
170
            //logger.info("saveUpdateNames-ok");
171
        }
172

    
173
        buildFeatureTree();
174
    }
175

    
176
	private Taxon handleSingleNode(Reference refMods, URI sourceName,
177
			List<TaxonNameBase> namesToSave, Node child, Taxon acceptedTaxon) {
178
		Taxon defaultTaxon =null;
179

    
180
		String nodeName = child.getNodeName();
181
		if (nodeName.equalsIgnoreCase("tax:nomenclature")){
182
		    NodeList nomenclatureChildren = child.getChildNodes();
183
		    boolean containsName = false;
184
		    for(int k=0; k<nomenclatureChildren.getLength(); k++){
185
		        if(nomenclatureChildren.item(k).getNodeName().equalsIgnoreCase("tax:name")){
186
		            containsName=true;
187
		            break;
188
		        }
189
		    }
190
		    if (containsName){
191
		        reloadClassification();
192
		        //extract "main" the scientific name
193
		        try{
194
		            acceptedTaxon = extractNomenclature(child, namesToSave, refMods);
195
		        }catch(ClassCastException e){
196
		        	//FIXME exception handling
197
		        	e.printStackTrace();
198
		        }
199
		        //                    System.out.println("acceptedTaxon : "+acceptedTaxon);
200
		    }
201
		}else if (nodeName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
202
		    reloadClassification();
203
		    //extract the References within the document
204
		    extractReferences(child, namesToSave ,acceptedTaxon,refMods);
205
		}else if (nodeName.equalsIgnoreCase("tax:div") &&
206
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
207
		    File file = new File(TaxonXImport.LOG_FOLDER + "multipleTaxonX.txt");
208
		    FileWriter writer;
209
		    try {
210
		        writer = new FileWriter(file ,true);
211
		        writer.write(sourceName+"\n");
212
		        writer.flush();
213
		        writer.close();
214
		    } catch (IOException e1) {
215
		        // TODO Auto-generated catch block
216
		        logger.error(e1.getMessage());
217
		    }
218
		    //                String multiple = askMultiple(children.item(i));
219
		    String multiple = "Other";
220
		    if (multiple.equalsIgnoreCase("other")) {
221
		        extractSpecificFeatureNotStructured(child,acceptedTaxon, defaultTaxon,namesToSave, refMods,multiple);
222
		    }else if (multiple.equalsIgnoreCase("synonyms")) {
223
		        try{
224
		            extractSynonyms(child,acceptedTaxon, refMods, null);
225
		        }catch(NullPointerException e){
226
		            logger.warn("the accepted taxon is maybe null");
227
		        }
228
		    }else if(multiple.equalsIgnoreCase("material examined")){
229
		    	extractMaterials(child, acceptedTaxon, refMods, namesToSave);
230
		    }else if (multiple.equalsIgnoreCase("distribution")){
231
		    	extractDistribution(child, acceptedTaxon, defaultTaxon, namesToSave, refMods);
232
		    }else if (multiple.equalsIgnoreCase("type status")){
233
		    	extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, "TypeStatus");
234
		    }else if (multiple.equalsIgnoreCase("vernacular name")){
235
		    	extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
236
		    }else{
237
		    	extractSpecificFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,multiple);
238
		    }
239
		}
240
		else if(nodeName.equalsIgnoreCase("tax:div") &&
241
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
242
		    extractFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, Feature.BIOLOGY_ECOLOGY());
243
		}
244
		else if(nodeName.equalsIgnoreCase("tax:div") &&
245
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected){
246
		    extractDescriptionWithReference(child, acceptedTaxon,defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
247
		}
248
		else if(nodeName.equalsIgnoreCase("tax:div") &&
249
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
250
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
251
		}
252
		else if(nodeName.equalsIgnoreCase("tax:div") &&
253
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
254
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,Feature.DIAGNOSIS());
255
		}
256
		else if(nodeName.equalsIgnoreCase("tax:div") &&
257
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
258
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DISCUSSION());
259
		}
260
		else if(nodeName.equalsIgnoreCase("tax:div") &&
261
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected){
262
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
263
		}
264
		else if(nodeName.equalsIgnoreCase("tax:div") &&
265
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
266
		    extractDistribution(child,acceptedTaxon,defaultTaxon,namesToSave, refMods);
267
		}
268
		else if(nodeName.equalsIgnoreCase("tax:div") &&
269
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
270
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave,refMods,Feature.ETYMOLOGY());
271
		}
272
		else if(nodeName.equalsIgnoreCase("tax:div") &&
273
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
274
		    extractMaterials(child,acceptedTaxon, refMods, namesToSave);
275
		}
276
		else if(nodeName.equalsIgnoreCase("tax:figure") && maxRankRespected){
277
		    extractSpecificFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, "Figure");
278
		}
279
		else if(nodeName.equalsIgnoreCase("tax:div") &&
280
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected){
281
		    extractSpecificFeature(child, acceptedTaxon,defaultTaxon, namesToSave, refMods, "table");
282
		}else if(nodeName.equalsIgnoreCase("tax:div") &&
283
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
284
		    //TODO IGNORE keys for the moment
285
		    //extractKey(children.item(i),acceptedTaxon, nameToSave,source, refMods);
286
		    extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,"Keys - unparsed");
287
		}
288
		else{
289
		    if (! nodeName.equalsIgnoreCase("tax:pb")){
290
		        //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
291
		        if (child.getAttributes() !=null) {
292
		            logger.info("First Attribute: " + child.getAttributes().item(0));
293
		        }
294
		        extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, notMarkedUp);
295
		    }else{
296
		    	//FIXME
297
		    	logger.warn("Unhandled");
298
		    }
299
		}
300
		return acceptedTaxon;
301
	}
302

    
303

    
304
    protected Map<String,Feature> getFeaturesUsed(){
305
        return featuresMap;
306
    }
307
    /**
308
     *
309
     */
310
    private void buildFeatureTree() {
311
        logger.info("buildFeatureTree");
312
        FeatureTree proibiospheretree = importer.getFeatureTreeService().find(proIbioTreeUUID);
313
        if (proibiospheretree == null){
314
            List<FeatureTree> trees = importer.getFeatureTreeService().list(FeatureTree.class, null, null, null, null);
315
            if (trees.size()==1) {
316
                FeatureTree ft = trees.get(0);
317
                if (featuresMap==null) {
318
                    featuresMap=new HashMap<String, Feature>();
319
                }
320
                for (Feature feature: ft.getDistinctFeatures()){
321
                    if(feature!=null) {
322
                        featuresMap.put(feature.getTitleCache(), feature);
323
                    }
324
                }
325
            }
326
            proibiospheretree = FeatureTree.NewInstance();
327
            proibiospheretree.setUuid(proIbioTreeUUID);
328
        }
329
        //        FeatureNode root = proibiospheretree.getRoot();
330
        FeatureNode root2 = proibiospheretree.getRoot();
331
        if (root2 != null){
332
            int nbChildren = root2.getChildCount()-1;
333
            while (nbChildren>-1){
334
                try{
335
                    root2.removeChild(nbChildren);
336
                }catch(Exception e){logger.warn("Can't remove child from FeatureTree "+e);}
337
                nbChildren --;
338
            }
339

    
340
        }
341

    
342
        for (Feature feature:featuresMap.values()) {
343
            root2.addChild(FeatureNode.NewInstance(feature));
344
        }
345
        importer.getFeatureTreeService().saveOrUpdate(proibiospheretree);
346

    
347
    }
348

    
349

    
350
    /**
351
     * @param keys
352
     * @param acceptedTaxon: the current acceptedTaxon
353
     * @param nametosave: the list of objects to save into the CDM
354
     * @param refMods: the current reference extracted from the MODS
355
     */
356
    /*   @SuppressWarnings("rawtypes")
357
    private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference refMods) {
358
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
359

    
360
        NodeList children = keys.getChildNodes();
361
        String key="";
362
        PolytomousKey poly =  PolytomousKey.NewInstance();
363
        poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
364
        poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
365
        poly.addTaxonomicScope(acceptedTaxon);
366
        poly.setTitleCache("bloup", true);
367
        //        poly.addCoveredTaxon(acceptedTaxon);
368
        PolytomousKeyNode root = poly.getRoot();
369
        PolytomousKeyNode previous = null,tmpKey=null;
370
        Taxon taxonKey=null;
371
        List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
372

    
373
        //        String fullContent = keys.getTextContent();
374
        for (int i=0;i<children.getLength();i++){
375
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
376
                NodeList paragraph = children.item(i).getChildNodes();
377
                key="";
378
                taxonKey=null;
379
                for (int j=0;j<paragraph.getLength();j++){
380
                    if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
381
                        if (! paragraph.item(j).getTextContent().trim().isEmpty()){
382
                            key+=paragraph.item(j).getTextContent().trim();
383
                            //                            logger.info("KEY: "+j+"--"+key);
384
                        }
385
                    }
386
                    if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
387
                        taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
388
                    }
389
                }
390
                //                logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
391
                if (keypattern.matcher(key).matches()){
392
                    tmpKey = PolytomousKeyNode.NewInstance(key);
393
                    if (taxonKey!=null) {
394
                        tmpKey.setTaxon(taxonKey);
395
                    }
396
                    polyNodes.add(tmpKey);
397
                    if (previous == null) {
398
                        root.addChild(tmpKey);
399
                    } else {
400
                        previous.addChild(tmpKey);
401
                    }
402
                }else{
403
                    if (!key.isEmpty()){
404
                        tmpKey=PolytomousKeyNode.NewInstance(key);
405
                        if (taxonKey!=null) {
406
                            tmpKey.setTaxon(taxonKey);
407
                        }
408
                        polyNodes.add(tmpKey);
409
                        if (keypatternend.matcher(key).matches()) {
410
                            root.addChild(tmpKey);
411
                            previous=tmpKey;
412
                        } else{
413
                            previous.addChild(tmpKey);
414
                        }
415

    
416
                    }
417
                }
418
            }
419
        }
420
        importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
421
        importer.getPolytomousKeyService().saveOrUpdate(poly);
422
    }
423
*/
424

    
425

    
426
    /**
427
     * @param taxons: the XML Nodegroup
428
     * @param nametosave: the list of objects to save into the CDM
429
     * @param acceptedTaxon: the current accepted Taxon
430
     * @param refMods: the current reference extracted from the MODS
431
     *
432
     * @return Taxon object built
433
     */
434
    @SuppressWarnings({ "rawtypes", "unused" })
435
    private TaxonNameBase getTaxonNameBaseFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference refMods, boolean isSynonym) {
436
        //        logger.info("getTaxonFromXML");
437
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
438
        logger.info("getTaxonNameBaseFromXML");
439
        TaxonNameBase nameToBeFilled = null;
440

    
441
        currentMyName=new MyName(isSynonym);
442

    
443
        NomenclaturalStatusType statusType = null;
444
        try {
445
        	String followingText = null;  //needs to be checked if following text is possible
446
            currentMyName = extractScientificName(taxons,refMods, null);
447
        } catch (TransformerFactoryConfigurationError e1) {
448
            logger.warn(e1);
449
        } catch (TransformerException e1) {
450
            logger.warn(e1);
451
        }
452
        /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
453

    
454
        nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
455
        if (nameToBeFilled.hasProblem() &&
456
                !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
457
            //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
458
            addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
459
            nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
460
        }
461

    
462
        nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
463
         */
464
        nameToBeFilled = currentMyName.getTaxonNameBase();
465
        return nameToBeFilled;
466

    
467
    }
468

    
469

    
470
    /**
471
     *
472
     */
473
    private void reloadClassification() {
474
        logger.info("reloadClassification");
475
        Classification cl = importer.getClassificationService().find(classification.getUuid());
476
        if (cl != null){
477
            classification = cl;
478
        }else{
479
            importer.getClassificationService().saveOrUpdate(classification);
480
            classification = importer.getClassificationService().find(classification.getUuid());
481
        }
482
    }
483

    
484
    //    /**
485
    //     * Create a Taxon for the current NameBase, based on the current reference
486
    //     * @param taxonNameBase
487
    //     * @param refMods: the current reference extracted from the MODS
488
    //     * @return Taxon
489
    //     */
490
    //    @SuppressWarnings({ "unused", "rawtypes" })
491
    //    private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference refMods) {
492
    //        Taxon t = new Taxon(taxonNameBase,null );
493
    //        if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
494
    //            t.setSec(configState.getConfig().getSecundum());
495
    //            logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
496
    //        }
497
    //        /*<<<<<<< .courant
498
    //        boolean sourceExists=false;
499
    //        Set<IdentifiableSource> sources = t.getSources();
500
    //        for (IdentifiableSource src : sources){
501
    //            String micro = src.getCitationMicroReference();
502
    //            Reference r = src.getCitation();
503
    //            if (r.equals(refMods) && micro == null) {
504
    //                sourceExists=true;
505
    //            }
506
    //        }
507
    //        if(!sourceExists) {
508
    //            t.addSource(null,null,refMods,null);
509
    //        }
510
    //=======*/
511
    //        t.addSource(OriginalSourceType.Import,null,null,refMods,null);
512
    //        t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
513
    //        return t;
514
    //    }
515

    
516
    private void  extractDescriptionWithReference(Node typestatus, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods,
517
            String featureName) {
518
        //        System.out.println("extractDescriptionWithReference !");
519
        logger.info("extractDescriptionWithReference");
520
        NodeList children = typestatus.getChildNodes();
521

    
522
        Feature currentFeature=getFeatureObjectFromString(featureName);
523

    
524
        String r="";String s="";
525
        for (int i=0;i<children.getLength();i++){
526
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
527
                s+=children.item(i).getTextContent().trim();
528
            }
529
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
530
                r+= children.item(i).getTextContent().trim();
531
            }
532
            if (s.indexOf(r)>-1) {
533
                s=s.split(r)[0];
534
            }
535
        }
536

    
537
        Reference currentref =  ReferenceFactory.newGeneric();
538
        if(!r.isEmpty()) {
539
            currentref.setTitleCache(r, true);
540
        } else {
541
            currentref=refMods;
542
        }
543
        setParticularDescription(s,acceptedTaxon,defaultTaxon, currentref, refMods,currentFeature);
544
    }
545

    
546
    /**
547
     * @param nametosave
548
     * @param distribution: the XML node group
549
     * @param acceptedTaxon: the current accepted Taxon
550
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
551
     * @param refMods: the current reference extracted from the MODS
552
     */
553
    @SuppressWarnings("rawtypes")
554
    private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference refMods) {
555
        logger.info("extractDistribution");
556
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
557
        NodeList children = distribution.getChildNodes();
558
        Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
559
        Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
560

    
561
        for (int i=0;i<children.getLength();i++){
562
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
563
                NodeList paragraph = children.item(i).getChildNodes();
564
                for (int j=0;j<paragraph.getLength();j++){
565
                    if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
566
                        extractText(descriptionsFulltext, i, paragraph.item(j));
567
                    }
568
                    else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
569
                        extractInLine(nametosave, refMods, descriptionsFulltext, i,paragraph.item(j));
570
                    }
571
                    else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
572
                        MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
573
                        DerivedUnit derivedUnitBase = null;
574
                        specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit, null);
575
                        extractTextFromSpecimenOrObservation(specimenOrObservations, descriptionsFulltext, i, specimenOrObservation);
576
                    }
577
                }
578
            }
579
        }
580

    
581
        int m=0;
582
        for (int k:descriptionsFulltext.keySet()) {
583
            if (k>m) {
584
                m=k;
585
            }
586
        }
587
        for (int k:specimenOrObservations.keySet()) {
588
            if (k>m) {
589
                m=k;
590
            }
591
        }
592

    
593

    
594
        if(acceptedTaxon!=null){
595
            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
596
            Feature currentFeature = Feature.DISTRIBUTION();
597
            //        DerivedUnit derivedUnitBase=null;
598
            //        String descr="";
599
            for (int k=0;k<=m;k++){
600
                if(specimenOrObservations.keySet().contains(k)){
601
                    for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
602
                        handleAssociation(acceptedTaxon, refMods, td, soo);
603
                    }
604
                }
605

    
606
                if (descriptionsFulltext.keySet().contains(k)){
607
                    if (!stringIsEmpty(descriptionsFulltext.get(k).trim()) && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
608
                        setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
609
                        break;
610
                    }
611
                    else{
612
                        handleTextData(refMods, descriptionsFulltext, td, currentFeature, k);
613
                    }
614
                }
615

    
616
                if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
617
                    acceptedTaxon.addDescription(td);
618
                    sourceHandler.addAndSaveSource(refMods, td, null);
619
                    importer.getTaxonService().saveOrUpdate(acceptedTaxon);
620
                }
621
            }
622
        }
623
    }
624

    
625
    /**
626
     * @param refMods
627
     * @param descriptionsFulltext
628
     * @param td
629
     * @param currentFeature
630
     * @param k
631
     */
632
    private void handleTextData(Reference refMods, Map<Integer, String> descriptionsFulltext, TaxonDescription td,
633
            Feature currentFeature, int k) {
634
        //logger.info("handleTextData");
635
        TextData textData = TextData.NewInstance();
636
        textData.setFeature(currentFeature);
637
        textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
638
        sourceHandler.addSource(refMods, textData);
639
        td.addElement(textData);
640
    }
641

    
642
    /**
643
     * @param acceptedTaxon
644
     * @param refMods
645
     * @param td
646
     * @param soo
647
     */
648
    private void handleAssociation(Taxon acceptedTaxon, Reference refMods, TaxonDescription td, MySpecimenOrObservation soo) {
649
        logger.info("handleAssociation");
650
        String descr=soo.getDescr();
651
        DerivedUnit derivedUnitBase = soo.getDerivedUnitBase();
652

    
653
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
654

    
655
        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
656

    
657
        Feature feature=null;
658
        feature = makeFeature(derivedUnitBase);
659
        if(!StringUtils.isEmpty(descr)) {
660
            derivedUnitBase.setTitleCache(descr, true);
661
        }
662

    
663
        IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
664

    
665
        taxonDescription.addElement(indAssociation);
666
        sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
667
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
668
        td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
669
    }
670

    
671
    /**
672
     * create an individualAssociation
673
     * @param refMods
674
     * @param derivedUnitBase
675
     * @param feature
676
     * @return
677
     */
678
    private IndividualsAssociation createIndividualAssociation(Reference refMods, DerivedUnit derivedUnitBase,
679
            Feature feature) {
680
        logger.info("createIndividualAssociation");
681
        IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
682
        indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
683
        indAssociation.setFeature(feature);
684
        indAssociation = sourceHandler.addSource(refMods, indAssociation);
685
        return indAssociation;
686
    }
687

    
688
    /**
689
     * @param specimenOrObservations
690
     * @param descriptionsFulltext
691
     * @param i
692
     * @param specimenOrObservation
693
     */
694
    private void extractTextFromSpecimenOrObservation(Map<Integer, List<MySpecimenOrObservation>> specimenOrObservations,
695
            Map<Integer, String> descriptionsFulltext, int i, MySpecimenOrObservation specimenOrObservation) {
696
        logger.info("extractTextFromSpecimenOrObservation");
697
        List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
698
        if (speObsList == null) {
699
            speObsList=new ArrayList<MySpecimenOrObservation>();
700
        }
701
        speObsList.add(specimenOrObservation);
702
        specimenOrObservations.put(i,speObsList);
703

    
704
        String s = specimenOrObservation.getDerivedUnitBase().toString();
705
        if (descriptionsFulltext.get(i) !=null){
706
            s = descriptionsFulltext.get(i)+" "+s;
707
        }
708
        descriptionsFulltext.put(i, s);
709
    }
710

    
711
    /**
712
     * Extract the text with the inline link to a taxon
713
     * @param nametosave
714
     * @param refMods
715
     * @param descriptionsFulltext
716
     * @param i
717
     * @param paragraph
718
     */
719
    @SuppressWarnings("rawtypes")
720
    private void extractInLine(List<TaxonNameBase> nametosave, Reference refMods, Map<Integer, String> descriptionsFulltext,
721
            int i, Node paragraph) {
722
        //logger.info("extractInLine");
723
        String inLine=getInlineTextForName(nametosave, refMods, paragraph);
724
        if (descriptionsFulltext.get(i) !=null){
725
            inLine = descriptionsFulltext.get(i)+inLine;
726
        }
727
        descriptionsFulltext.put(i, inLine);
728
    }
729

    
730
    /**
731
     * Extract the raw text from a Node
732
     * @param descriptionsFulltext
733
     * @param node
734
     * @param j
735
     */
736
    private void extractText(Map<Integer, String> descriptionsFulltext, int i, Node node) {
737
        //logger.info("extractText");
738
        if(!node.getTextContent().trim().isEmpty()) {
739
            String s =node.getTextContent().trim();
740
            if (descriptionsFulltext.get(i) !=null){
741
                s = descriptionsFulltext.get(i)+" "+s;
742
            }
743
            descriptionsFulltext.put(i, s);
744
        }
745
    }
746

    
747

    
748
    /**
749
     * @param materials: the XML node group
750
     * @param acceptedTaxon: the current accepted Taxon
751
     * @param refMods: the current reference extracted from the MODS
752
     */
753
    @SuppressWarnings("rawtypes")
754
    private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference refMods,List<TaxonNameBase> nametosave) {
755
        logger.info("EXTRACTMATERIALS");
756
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
757
        NodeList children = materials.getChildNodes();
758
        NodeList events = null;
759
        //        String descr="";
760

    
761

    
762
        for (int i=0;i<children.getLength();i++){
763
            String rawAssociation="";
764
            boolean added=false;
765
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
766
                events = children.item(i).getChildNodes();
767
                for(int k=0;k<events.getLength();k++){
768
                    if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
769
                        String inLine= getInlineTextForName(nametosave, refMods, events.item(k));
770
                        if(!inLine.isEmpty()) {
771
                            rawAssociation+=inLine;
772
                        }
773
                    }
774
                    if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
775
                            && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
776
                        rawAssociation+= events.item(k).getTextContent().trim();
777
                    }
778
                    if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
779
                        if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
780
                            rawAssociation="no description text";
781
                        }
782
                        added=true;
783
                        handleDerivedUnitFacadeAndBase(acceptedTaxon, refMods, events.item(k), rawAssociation);
784
                    }
785
                    if (!rawAssociation.isEmpty() && !added){
786

    
787
                        Feature feature = Feature.MATERIALS_EXAMINED();
788
                        featuresMap.put(feature.getTitleCache(),feature);
789

    
790
                        TextData textData = createTextData(rawAssociation, refMods, feature);
791

    
792
                        if(! rawAssociation.isEmpty() && (acceptedTaxon!=null)){
793
                            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
794
                            td.addElement(textData);
795
                            acceptedTaxon.addDescription(td);
796
                            sourceHandler.addAndSaveSource(refMods, td, null);
797
                        }
798
                        //                        DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
799
                        //                        derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
800
                        //
801
                        //                        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
802
                        //                        acceptedTaxon.addDescription(taxonDescription);
803
                        //
804
                        //                        IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
805
                        //
806
                        //                        Feature feature = Feature.MATERIALS_EXAMINED();
807
                        //                        featuresMap.put(feature.getTitleCache(),feature);
808
                        //                        if(!StringUtils.isEmpty(rawAssociation)) {
809
                        //                            derivedUnitBase.setTitleCache(rawAssociation, true);
810
                        //                        }
811
                        //                        indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
812
                        //                        indAssociation.setFeature(feature);
813
                        //                        indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
814
                        //
815
                        //                        /*boolean sourceExists=false;
816
                        //                        Set<DescriptionElementSource> dsources = indAssociation.getSources();
817
                        //                        for (DescriptionElementSource src : dsources){
818
                        //                            String micro = src.getCitationMicroReference();
819
                        //                            Reference r = src.getCitation();
820
                        //                            if (r.equals(refMods) && micro == null) {
821
                        //                                sourceExists=true;
822
                        //                            }
823
                        //                        }
824
                        //                        if(!sourceExists) {
825
                        //                            indAssociation.addSource(null, null, refMods, null);
826
                        //                        }*/
827
                        //                        taxonDescription.addElement(indAssociation);
828
                        //                        taxonDescription.setTaxon(acceptedTaxon);
829
                        //                        taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
830
                        //
831
                        //                        /*sourceExists=false;
832
                        //                        Set<IdentifiableSource> sources = taxonDescription.getSources();
833
                        //                        for (IdentifiableSource src : sources){
834
                        //                            String micro = src.getCitationMicroReference();
835
                        //                            Reference r = src.getCitation();
836
                        //                            if (r.equals(refMods) && micro == null) {
837
                        //                                sourceExists=true;
838
                        //                            }
839
                        //                        }
840
                        //                        if(!sourceExists) {
841
                        //                            taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
842
                        //                        }*/
843
                        //
844
                        //                        importer.getDescriptionService().saveOrUpdate(taxonDescription);
845
                        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
846

    
847
                        rawAssociation="";
848
                    }
849
                }
850
            }
851
        }
852
    }
853

    
854
    /**
855
     * @param acceptedTaxon
856
     * @param refMods
857
     * @param events
858
     * @param rawAssociation
859
     * @param k
860
     */
861
    private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon, Reference refMods, Node event,
862
            String rawAssociation) {
863
        logger.info("handleDerivedUnitFacadeAndBase");
864
        String descr;
865
        DerivedUnit derivedUnitBase;
866
        MySpecimenOrObservation myspecimenOrObservation;
867
        DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
868
        derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
869

    
870
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
871

    
872
        //TODO this may not always be correct, ask user
873
        TaxonNameBase<?,?> typifiableName = acceptedTaxon != null ?  acceptedTaxon.getName() : null;
874
        myspecimenOrObservation = extractSpecimenOrObservation(event,derivedUnitBase,SpecimenOrObservationType.DerivedUnit, typifiableName);
875
        derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
876
        descr=myspecimenOrObservation.getDescr();
877

    
878
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
879

    
880
        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
881

    
882
        Feature feature = makeFeature(derivedUnitBase);
883
        featuresMap.put(feature.getTitleCache(),feature);
884
        if(!StringUtils.isEmpty(descr)) {
885
            derivedUnitBase.setTitleCache(descr, true);
886
        }
887

    
888
        IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
889

    
890
        taxonDescription.addElement(indAssociation);
891
        sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
892
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
893
    }
894

    
895

    
896

    
897
    /**
898
     * @param currentName
899
     * @param materials: the XML node group
900
     * @param acceptedTaxon: the current accepted Taxon
901
     * @param refMods: the current reference extracted from the MODS
902
     */
903
    private String extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference refMods, String event, TaxonNameBase<?,?> currentName) {
904
        logger.info("extractMaterialsDirect");
905
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
906
        String descr="";
907

    
908
        DerivedUnit derivedUnitBase=null;
909
        MySpecimenOrObservation myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.DerivedUnit, currentName);
910
        derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
911

    
912
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
913

    
914
        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
915

    
916
        Feature feature=null;
917
        if (event.equalsIgnoreCase("collection")){
918
            feature = makeFeature(derivedUnitBase);
919
        }
920
        else{
921
            feature = Feature.MATERIALS_EXAMINED();
922
        }
923
        featuresMap.put(feature.getTitleCache(),  feature);
924

    
925
        descr=myspecimenOrObservation.getDescr();
926
        if(!StringUtils.isEmpty(descr)) {
927
            derivedUnitBase.setTitleCache(descr, true);
928
        }
929

    
930
        IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
931

    
932
        taxonDescription.addElement(indAssociation);
933
        sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
934
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
935

    
936
        return derivedUnitBase.getTitleCache();
937

    
938
    }
939

    
940

    
941
    /**
942
     * @param description: the XML node group
943
     * @param acceptedTaxon: the current acceptedTaxon
944
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
945
     * @param nametosave: the list of objects to save into the CDM
946
     * @param refMods: the current reference extracted from the MODS
947
     * @param featureName: the feature name
948
     */
949
    @SuppressWarnings({ "rawtypes"})
950
    private String extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
951
            List<TaxonNameBase> nametosave, Reference refMods, String featureName ) {
952
        logger.info("extractSpecificFeature "+featureName);
953
        //        System.out.println("GRUUUUuu");
954
        NodeList children = description.getChildNodes();
955
        NodeList insideNodes ;
956
        NodeList trNodes;
957
        //        String descr ="";
958
        String localdescr="";
959
        List<String> blabla=null;
960
        List<String> text = new ArrayList<String>();
961

    
962
        String table="<table>";
963
        String head="";
964
        String line="";
965

    
966
        Feature currentFeature=getFeatureObjectFromString(featureName);
967

    
968
        //        String fullContent = description.getTextContent();
969
        for (int i=0;i<children.getLength();i++){
970
            //            localdescr="";
971
            if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
972
                text.add(children.item(i).getTextContent().trim());
973
            }
974
            if (featureName.equalsIgnoreCase("table")){
975
                if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
976
                        children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
977
                    head = extractTableHead(children.item(i));
978
                    table+=head;
979
                    line = extractTableLine(children.item(i));
980
                    if (!line.equalsIgnoreCase("<tr></tr>")) {
981
                        table+=line;
982
                    }
983
                }
984
                if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
985
                        children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
986
                    line = extractTableLineWithColumn(children.item(i).getChildNodes());
987
                    if(!line.equalsIgnoreCase("<tr></tr>")) {
988
                        table+=line;
989
                    }
990
                }
991
            }
992
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
993
                insideNodes=children.item(i).getChildNodes();
994
                blabla= new ArrayList<String>();
995
                for (int j=0;j<insideNodes.getLength();j++){
996
                    Node insideNode = insideNodes.item(j);
997
                	if (insideNode.getNodeName().equalsIgnoreCase("tax:name")){
998
                        String inlinetext = getInlineTextForName(nametosave, refMods, insideNode);
999
                        if (!inlinetext.isEmpty()) {
1000
                            blabla.add(inlinetext);
1001
                        }
1002
                    }
1003
                    else if (insideNode.getNodeName().equalsIgnoreCase("#text")) {
1004
                        if(!insideNode.getTextContent().trim().isEmpty()){
1005
                            blabla.add(insideNode.getTextContent().trim());
1006
                            //                            localdescr += insideNodes.item(j).getTextContent().trim();
1007
                        }
1008
                    }
1009
                }
1010
                if (!blabla.isEmpty()) {
1011
                    String blaStr = StringUtils.join(blabla," ").trim();
1012
                    if(!stringIsEmpty(blaStr)) {
1013
                        setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1014
                        text.add(blaStr);
1015
                    }
1016
                }
1017

    
1018
            }
1019
            if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1020
                if(!children.item(i).getTextContent().trim().isEmpty()){
1021
                    localdescr = children.item(i).getTextContent().trim();
1022
                    if(!stringIsEmpty(localdescr)) {
1023
                        setParticularDescription(localdescr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1024
                    }
1025
                }
1026
            }
1027
        }
1028

    
1029
        table+="</table>";
1030
        if (!table.equalsIgnoreCase("<table></table>")){
1031
            //            System.out.println("TABLE : "+table);
1032
            text.add(table);
1033
        }
1034

    
1035
        if (text !=null && !text.isEmpty()) {
1036
            return StringUtils.join(text," ");
1037
        } else {
1038
            return "";
1039
        }
1040

    
1041
    }
1042

    
1043
    /**
1044
     * @param children
1045
     * @param i
1046
     * @return
1047
     */
1048
    private String extractTableLine(Node child) {
1049
        //logger.info("extractTableLine");
1050
        String line;
1051
        line="<tr>";
1052
        if (child.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1053
            line = extractTableLineWithColumn(child.getChildNodes());
1054
        }
1055
        line+="</tr>";
1056
        return line;
1057
    }
1058

    
1059
    /**
1060
     * @param children
1061
     * @param i
1062
     * @return
1063
     */
1064
    private String extractTableHead(Node child) {
1065
        //logger.info("extractTableHead");
1066
        String head;
1067
        String line;
1068
        head="<th>";
1069
        NodeList trNodes = child.getChildNodes();
1070
        for (int k=0;k<trNodes.getLength();k++){
1071
            if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:div")
1072
                    && trNodes.item(k).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1073
                line = extractTableLineWithColumn(trNodes.item(k).getChildNodes());
1074
                head+=line;
1075
            }
1076
        }
1077
        head+="</th>";
1078
        return head;
1079
    }
1080

    
1081
    /**
1082
     * build a html table line, with td columns
1083
     * @param tdNodes
1084
     * @return an html coded line
1085
     */
1086
    private String extractTableLineWithColumn(NodeList tdNodes) {
1087
        //logger.info("extractTableLineWithColumn");
1088
        String line;
1089
        line="<tr>";
1090
        for (int l=0;l<tdNodes.getLength();l++){
1091
            if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1092
                line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1093
            }
1094
        }
1095
        line+="</tr>";
1096
        return line;
1097
    }
1098

    
1099
    /**
1100
     * @param description: the XML node group
1101
     * @param acceptedTaxon: the current acceptedTaxon
1102
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1103
     * @param nametosave: the list of objects to save into the CDM
1104
     * @param refMods: the current reference extracted from the MODS
1105
     * @param featureName: the feature name
1106
     */
1107
    @SuppressWarnings({ "unused", "rawtypes" })
1108
    private String extractSpecificFeatureNotStructured(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1109
            List<TaxonNameBase> nameToSave, Reference refMods, String featureName ) {
1110
        logger.info("extractSpecificFeatureNotStructured " + featureName);
1111
        NodeList children = description.getChildNodes();
1112
        NodeList insideNodes ;
1113
        List<String> blabla= new ArrayList<String>();
1114

    
1115

    
1116
        Feature currentFeature = getFeatureObjectFromString(featureName);
1117

    
1118
        String fullContent = description.getTextContent();
1119
        for (int i=0;i<children.getLength();i++){
1120
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1121
                insideNodes=children.item(i).getChildNodes();
1122
                for (int j=0;j<insideNodes.getLength();j++){
1123
                    if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1124
                        String inlineText =getInlineTextForName(nameToSave, refMods, insideNodes.item(j));
1125
                        if(!inlineText.isEmpty()) {
1126
                            blabla.add(inlineText);
1127
                        }
1128
                    }
1129
                    if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1130
                        if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1131
                            blabla.add(insideNodes.item(j).getTextContent().trim());
1132
                        }
1133
                    }
1134
                }
1135
            }
1136
            if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1137
                if(!children.item(i).getTextContent().trim().isEmpty()){
1138
                    String localdescr = children.item(i).getTextContent().trim();
1139
                    if(!localdescr.isEmpty())
1140
                    {
1141
                        blabla.add(localdescr);
1142
                    }
1143
                }
1144
            }
1145
        }
1146

    
1147
        if (blabla !=null && !blabla.isEmpty()) {
1148
            String blaStr = StringUtils.join(blabla," ").trim();
1149
            if (! stringIsEmpty(blaStr)) {
1150
                setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1151
                return blaStr;
1152
            } else {
1153
                return "";
1154
            }
1155
        } else {
1156
            return "";
1157
        }
1158

    
1159
    }
1160

    
1161
    /**
1162
     * @param blaStr
1163
     * @return
1164
     */
1165
    private boolean stringIsEmpty(String blaStr) {
1166
        if (blaStr.matches("(\\.|,|;|\\.-)?")){
1167
        	return true;
1168
        }else{
1169
        	return false;
1170
        }
1171
    }
1172

    
1173
    /**
1174
     * @param nametosave
1175
     * @param refMods
1176
     * @param insideNodes
1177
     * @param blabla
1178
     * @param j
1179
     */
1180
    @SuppressWarnings({ "rawtypes" })
1181
    private String getInlineTextForName(List<TaxonNameBase> nametosave, Reference refMods, Node insideNode) {
1182
        if (true){
1183
        	NodeList children = insideNode.getChildNodes();
1184
        	String result = "";
1185
            for (int i=0;i<children.getLength();i++){
1186
            	Node nameChild = children.item(i);
1187
                if(nameChild.getNodeName().equalsIgnoreCase("#text")){
1188
                	result += nameChild.getTextContent();
1189
                }else{
1190
                	//do nothing
1191
                }
1192
            }
1193
        	return result.replace("\n", "").trim();
1194
        }else{
1195
	    	TaxonNameBase tnb = getTaxonNameBaseFromXML(insideNode, nametosave,refMods,false);
1196
	        //                        Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1197
	        Taxon tax = currentMyName.getTaxon();
1198
	        if(tnb !=null && tax != null){
1199
	            String linkedTaxon = tnb.getTitleCache().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1200
	            return "<cdm:taxon uuid='"+tax.getUuid()+"'>"+linkedTaxon+"</cdm:taxon>";
1201
	        }else if (tnb != null && tax == null){
1202
	        	//TODO
1203
	        	return "<cdm:taxonName uuid='" + tnb.getUuid() +"'>" + tnb.getTitleCache().split("sec")[0]  +"</cdm:taxonName>";
1204
	        }else{
1205
	        	logger.warn("Inline text has no content yet");
1206
	        }
1207
	        return "";
1208
        }
1209
    }
1210

    
1211
    /**
1212
     * @param featureName
1213
     * @return
1214
     */
1215
    @SuppressWarnings("rawtypes")
1216
    private Feature getFeatureObjectFromString(String featureName) {
1217
        logger.info("getFeatureObjectFromString");
1218
        List<Feature> features = importer.getTermService().list(Feature.class, null,null,null,null);
1219
        Feature currentFeature=null;
1220
        for (Feature feature: features){
1221
            String tmpF = feature.getTitleCache();
1222
            if (tmpF.equalsIgnoreCase(featureName)) {
1223
                currentFeature=feature;
1224
                //                System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1225
            }
1226
        }
1227
        if (currentFeature == null) {
1228
            currentFeature=Feature.NewInstance(featureName, featureName, featureName);
1229
            if(featureName.equalsIgnoreCase("Other")){
1230
                currentFeature.setUuid(OtherUUID);
1231
            }
1232
            if(featureName.equalsIgnoreCase(notMarkedUp)){
1233
                currentFeature.setUuid(NotMarkedUpUUID);
1234
            }
1235
            importer.getTermService().saveOrUpdate(currentFeature);
1236
        }
1237
        return currentFeature;
1238
    }
1239

    
1240

    
1241

    
1242

    
1243
    /**
1244
     * @param children: the XML node group
1245
     * @param nametosave: the list of objects to save into the CDM
1246
     * @param acceptedTaxon: the current acceptedTaxon
1247
     * @param refMods: the current reference extracted from the MODS
1248
     * @param fullContent :the parsed XML content
1249
     * @return a list of description (text)
1250
     */
1251
    @SuppressWarnings({ "unused", "rawtypes" })
1252
    private List<String> parseParagraph(List<TaxonNameBase> namesToSave, Taxon acceptedTaxon, Reference refMods, Node paragraph, Feature feature){
1253
        logger.info("parseParagraph "+feature.toString());
1254
        List<String> fullDescription=  new ArrayList<String>();
1255
        //        String localdescr;
1256
        String descr="";
1257
        NodeList insideNodes ;
1258
        boolean collectionEvent = false;
1259
        List<Node>collectionEvents = new ArrayList<Node>();
1260

    
1261
        NodeList children = paragraph.getChildNodes();
1262

    
1263
        for (int i=0;i<children.getLength();i++){
1264
            //            localdescr="";
1265
            if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1266
                descr += children.item(i).getTextContent().trim();
1267
            }
1268
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1269
                insideNodes=children.item(i).getChildNodes();
1270
                List<String> blabla= new ArrayList<String>();
1271
                for (int j=0;j<insideNodes.getLength();j++){
1272
                    boolean nodeKnown = false;
1273
                    //    System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1274
                    if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1275
                        String inlineText = getInlineTextForName(namesToSave, refMods, insideNodes.item(j));
1276
                        if (!inlineText.isEmpty()) {
1277
                            blabla.add(inlineText);
1278
                        }
1279
                        nodeKnown=true;
1280
                    }
1281
                    else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1282
                        if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1283
                            blabla.add(insideNodes.item(j).getTextContent().trim());
1284
                            // localdescr += insideNodes.item(j).getTextContent().trim();
1285
                        }
1286
                        nodeKnown=true;
1287
                    }
1288
                    else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
1289
                        String ref = insideNodes.item(j).getTextContent().trim();
1290
                        if (ref.endsWith(";")  && ((ref.length())>1)) {
1291
                            ref=ref.substring(0, ref.length()-1)+".";
1292
                        }
1293
                        Reference reference = ReferenceFactory.newGeneric();
1294
                        reference.setTitleCache(ref, true);
1295
                        blabla.add(reference.getTitleCache());
1296
                        nodeKnown=true;
1297
                    }
1298
                    else if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:figure")){
1299
                        String figure = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "figure");
1300
                        blabla.add(figure);
1301
                    }
1302
                    else if(insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:div") &&
1303
                            insideNodes.item(j).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1304
                            insideNodes.item(j).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1305
                        String table = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1306
                        blabla.add(table);
1307
                    }
1308
                    else if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1309
                        //                        logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1310
                        String titlecache  = extractMaterialsDirect(insideNodes.item(j), acceptedTaxon, refMods, "collection", null);
1311
                        blabla.add(titlecache);
1312
                        collectionEvent=true;
1313
                        collectionEvents.add(insideNodes.item(j));
1314
                        nodeKnown=true;
1315
                    }else{
1316
                    	logger.warn("node not handled yet: " + insideNodes.item(j).getNodeName());
1317
                    }
1318

    
1319
                }
1320
                if (!StringUtils.isBlank(StringUtils.join(blabla," "))) {
1321
                    fullDescription.add(StringUtils.join(blabla," "));
1322
                }
1323
            }
1324
            if  (children.item(i).getNodeName().equalsIgnoreCase("tax:figure")){
1325
                String figure = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "Figure");
1326
                fullDescription.add(figure);
1327
            }
1328
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1329
                    children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1330
                    children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1331
                String table = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1332
                fullDescription.add(table);
1333
            }
1334
        }
1335

    
1336
        if( !stringIsEmpty(descr.trim())){
1337
            Feature currentFeature= getNotMarkedUpFeatureObject();
1338
            setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1339
        }
1340
        //        if (collectionEvent) {
1341
        //            logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1342
        //            for (Node coll:collectionEvents){
1343
        //                = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1344
        //            }
1345
        //        }
1346
        return fullDescription;
1347
    }
1348

    
1349

    
1350
    /**
1351
     * @param description: the XML node group
1352
     * @param acceptedTaxon: the current acceptedTaxon
1353
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1354
     * @param nametosave: the list of objects to save into the CDM
1355
     * @param refMods: the current reference extracted from the MODS
1356
     * @param feature: the feature to link the data with
1357
     */
1358
    @SuppressWarnings("rawtypes")
1359
    private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> namesToSave, Reference refMods, Feature feature){
1360
        logger.info("EXTRACT FEATURE "+feature.toString());
1361
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1362
        List<String> fullDescription= parseParagraph( namesToSave, acceptedTaxon, refMods, description,feature);
1363

    
1364
        //        System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1365
        if (!fullDescription.isEmpty() &&!stringIsEmpty(StringUtils.join(fullDescription,"\n").trim())) {
1366
            setParticularDescription(StringUtils.join(fullDescription,"\n").trim(),acceptedTaxon,defaultTaxon, refMods,feature);
1367
        }
1368

    
1369
    }
1370

    
1371

    
1372
    /**
1373
     * @param descr: the XML Nodegroup to parse
1374
     * @param acceptedTaxon: the current acceptedTaxon
1375
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1376
     * @param refMods: the current reference extracted from the MODS
1377
     * @param currentFeature: the feature name
1378
     * @return
1379
     */
1380
    private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods, Feature currentFeature) {
1381
        logger.info("setParticularDescription " + currentFeature.getTitleCache()+", \n blabla : "+descr);
1382

    
1383
        //remove redundant feature title
1384
        String featureStr = currentFeature.getTitleCache();
1385
        if (!descr.isEmpty() && descr.toLowerCase().startsWith(featureStr.toLowerCase())){
1386
        	descr = descr.replaceAll("(?i)" + featureStr + "\\.\\s*", "");
1387
        }
1388

    
1389

    
1390
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1391
        featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1392

    
1393
        TextData textData = createTextData(descr, refMods, currentFeature);
1394

    
1395
        if(acceptedTaxon!=null){
1396
            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1397
            td.addElement(textData);
1398
            acceptedTaxon.addDescription(td);
1399

    
1400
            sourceHandler.addAndSaveSource(refMods, td, null);
1401
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1402
        }
1403

    
1404
        if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1405
            try{
1406
                Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1407
                if (tmp!=null) {
1408
                    defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1409
                }else{
1410
                    importer.getTaxonService().saveOrUpdate(defaultTaxon);
1411
                }
1412
            }catch(Exception e){
1413
                logger.debug("TAXON EXISTS"+defaultTaxon);
1414
            }
1415

    
1416
            TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1417
            defaultTaxon.addDescription(td);
1418
            td.addElement(textData);
1419
            sourceHandler.addAndSaveSource(refMods, td, null);
1420
            importer.getTaxonService().saveOrUpdate(defaultTaxon);
1421
        }
1422
    }
1423

    
1424
    /**
1425
     * @param descr
1426
     * @param refMods
1427
     * @param currentFeature
1428
     * @return
1429
     */
1430
    private TextData createTextData(String descr, Reference refMods, Feature currentFeature) {
1431
        //logger.info("createTextData");
1432
        TextData textData = TextData.NewInstance();
1433
        textData.setFeature(currentFeature);
1434
        sourceHandler.addSource(refMods, textData);
1435

    
1436
        textData.putText(Language.UNKNOWN_LANGUAGE(), descr);
1437
        return textData;
1438
    }
1439

    
1440

    
1441

    
1442
    /**
1443
     * @param descr: the XML Nodegroup to parse
1444
     * @param acceptedTaxon: the current acceptedTaxon
1445
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1446
     * @param refMods: the current reference extracted from the MODS
1447
     * @param currentFeature: the feature name
1448
     * @return
1449
     */
1450
    private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon,Reference currentRef, Reference refMods, Feature currentFeature) {
1451
        //        System.out.println("setParticularDescriptionSPecial "+currentFeature);
1452
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
1453
        logger.info("setParticularDescription");
1454
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1455

    
1456
        featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1457
        TextData textData = createTextData(descr, refMods, currentFeature);
1458

    
1459
        if(! descr.isEmpty() && (acceptedTaxon!=null)){
1460
            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1461
            td.addElement(textData);
1462
            acceptedTaxon.addDescription(td);
1463

    
1464
            sourceHandler.addAndSaveSource(refMods, td, currentRef);
1465
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1466
        }
1467

    
1468
        if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1469
            try{
1470
                Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1471
                if (tmp!=null) {
1472
                    defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1473
                }else{
1474
                    importer.getTaxonService().saveOrUpdate(defaultTaxon);
1475
                }
1476
            }catch(Exception e){
1477
                logger.debug("TAXON EXISTS"+defaultTaxon);
1478
            }
1479

    
1480
            TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1481
            defaultTaxon.addDescription(td);
1482
            td.addElement(textData);
1483
            sourceHandler.addAndSaveSource(currentRef, td,currentRef);
1484
            importer.getTaxonService().saveOrUpdate(defaultTaxon);
1485
        }
1486
    }
1487

    
1488

    
1489

    
1490
    /**
1491
     * @param synonyms: the XML Nodegroup to parse
1492
     * @param nametosave: the list of objects to save into the CDM
1493
     * @param acceptedTaxon: the current acceptedTaxon
1494
     * @param refMods: the current reference extracted from the MODS
1495
     */
1496
    @SuppressWarnings({ "rawtypes" })
1497
    private void extractSynonyms(Node synonymsNode, Taxon acceptedTaxon,Reference refMods, String followingText) {
1498
        logger.info("extractSynonyms");
1499
        //System.out.println("extractSynonyms for: "+acceptedTaxon);
1500
        Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1501
        if (ttmp != null) {
1502
            acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1503
        }
1504
        else{
1505
            acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1506
        }
1507
        NodeList children = synonymsNode.getChildNodes();
1508
        List<MyName> names = new ArrayList<MyName>();
1509

    
1510
        if(synonymsNode.getNodeName().equalsIgnoreCase("tax:name")){
1511
            try {
1512
            	MyName myName = extractScientificNameSynonym(synonymsNode, refMods, followingText);
1513
                names.add(myName);
1514
            } catch (TransformerFactoryConfigurationError e) {
1515
                logger.warn(e);
1516
            } catch (TransformerException e) {
1517
                logger.warn(e);
1518
            }
1519
        }
1520

    
1521

    
1522
        for (int i=0;i<children.getLength();i++){
1523
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1524
                NodeList tmp = children.item(i).getChildNodes();
1525
                //                String fullContent = children.item(i).getTextContent();
1526
                for (int j=0; j< tmp.getLength();j++){
1527
                    if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1528
                        try {
1529
                        	MyName myName = extractScientificNameSynonym(tmp.item(j),refMods, followingText);
1530
                            names.add(myName);
1531
                        } catch (TransformerFactoryConfigurationError e) {
1532
                            logger.warn(e);
1533
                        } catch (TransformerException e) {
1534
                            logger.warn(e);
1535
                        }
1536
                    }
1537
                }
1538
            }
1539
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1540
                try {
1541
                	MyName myName = extractScientificNameSynonym(children.item(i),refMods, followingText);
1542
                    names.add(myName);
1543
                } catch (TransformerFactoryConfigurationError e) {
1544
                    logger.warn(e);
1545
                } catch (TransformerException e) {
1546
                    logger.warn(e);
1547
                }
1548

    
1549
            }
1550
        }
1551

    
1552
        for(MyName name:names){
1553
        	TaxonNameBase nameToBeFilled = name.getTaxonNameBase();
1554
            Synonym synonym = name.getSyno();
1555
            addFollowingTextToName(nameToBeFilled, followingText);
1556

    
1557
            /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1558
            nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1559
            if (nameToBeFilled.hasProblem() &&
1560
                    !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1561
                //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1562
                addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1563
                nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1564
            }
1565
            nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1566
             */
1567
            if (!name.getIdentifier().isEmpty() && (name.getIdentifier().length()>2)){
1568
                setLSID(name.getIdentifier(), synonym);
1569
            }
1570

    
1571
            Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1572
            boolean synoExist = false;
1573
            for (Synonym syn: synonymsSet){
1574

    
1575
                boolean a =syn.getName().equals(synonym.getName());
1576
                boolean b = syn.getSec().equals(synonym.getSec());
1577
                if (a && b) {
1578
                    synoExist=true;
1579
                }
1580
            }
1581
            if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1582
                sourceHandler.addSource(refMods, synonym);
1583
                acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1584
            }
1585
        }
1586
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1587
    }
1588

    
1589

    
1590
    private boolean addFollowingTextToName(TaxonNameBase nameToBeFilled, String followingText) {
1591
    	if (nameToBeFilled != null && StringUtils.isNotBlank(followingText)){
1592
    		if (! followingText.matches("\\d\\.?")){
1593

    
1594
	    		if (followingText.startsWith(",")){
1595
	    			followingText = followingText.substring(1).trim();
1596
	    		}
1597
	    		nameToBeFilled.setFullTitleCache(nameToBeFilled.getFullTitleCache()+ "," +followingText , true);
1598
    		}
1599
    		return true;
1600
    	}
1601
    	return false;
1602

    
1603
	}
1604

    
1605
	/**
1606
     * @param refgroup: the XML nodes
1607
     * @param nametosave: the list of objects to save into the CDM
1608
     * @param acceptedTaxon: the current acceptedTaxon
1609
     * @param nametosave: the list of objects to save into the CDM
1610
     * @param refMods: the current reference extracted from the MODS
1611
     * @return the acceptedTaxon (why?)
1612
     * handle cases where the bibref are inside <p> and outside
1613
     */
1614
    @SuppressWarnings({ "rawtypes" })
1615
    private Taxon extractReferences(Node refgroup, List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference refMods) {
1616
        logger.info("extractReferences");
1617
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1618

    
1619
        NodeList children = refgroup.getChildNodes();
1620
        NonViralName<?> nameToBeFilled = getNonViralNameAccNomenclature();
1621

    
1622
        ReferenceBuilder refBuild = new ReferenceBuilder(sourceHandler);
1623
        for (int i=0;i<children.getLength();i++){
1624
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
1625
                String ref = children.item(i).getTextContent().trim();
1626
                refBuild.builReference(ref, treatmentMainName, nomenclaturalCode,  acceptedTaxon, refMods);
1627
                if (!refBuild.isFoundBibref()){
1628
                    extractReferenceRawText(children.item(i).getChildNodes(), nameToBeFilled, refMods, acceptedTaxon);
1629
                }
1630
            }
1631

    
1632
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1633
                NodeList references = children.item(i).getChildNodes();
1634
                String descr="";
1635
                for (int j=0;j<references.getLength();j++){
1636
                    if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1637
                        String ref = references.item(j).getTextContent().trim();
1638
                        refBuild.builReference(ref, treatmentMainName,  nomenclaturalCode,  acceptedTaxon, refMods);
1639
                    }
1640
                    else
1641
                        if (references.item(j).getNodeName().equalsIgnoreCase("#text")
1642
                                && !references.item(j).getTextContent().trim().isEmpty()){
1643
                            descr += references.item(j).getTextContent().trim();
1644
                        }
1645

    
1646
                }
1647
                if (!refBuild.isFoundBibref()){
1648
                    //if it's not tagged, put it as row information.
1649
                    //                    extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1650
                    //then put it as a not markup feature if not empty
1651
                    if (!stringIsEmpty(descr.trim())){
1652
                        Feature currentFeature= getNotMarkedUpFeatureObject();
1653
                        setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1654
                    }
1655
                }
1656
            }
1657
        }
1658
        //        importer.getClassificationService().saveOrUpdate(classification);
1659
        return acceptedTaxon;
1660

    
1661
    }
1662

    
1663
    /**
1664
     * get the non viral name according to the current nomenclature
1665
     * @return
1666
     */
1667

    
1668
    private NonViralName<?> getNonViralNameAccNomenclature() {
1669
    	return (NonViralName<?>)nomenclaturalCode.getNewTaxonNameInstance(null);
1670
    }
1671

    
1672
    /**
1673
     * @return the feature object for the category "not marked up"
1674
     */
1675
    private Feature getNotMarkedUpFeatureObject() {
1676
    	// FIXME use getFeature(uuid ....)
1677
        logger.info("getNotMarkedUpFeatureObject");
1678
        Feature currentFeature = (Feature)importer.getTermService().find(NotMarkedUpUUID);
1679
        if (currentFeature == null) {
1680
            currentFeature=Feature.NewInstance(notMarkedUp, notMarkedUp, notMarkedUp);
1681
            currentFeature.setUuid(NotMarkedUpUUID);
1682
            //TODO use userDefined Feature Vocabulary
1683
            Feature.DISTRIBUTION().getVocabulary().addTerm(currentFeature);
1684
//            importer.getTermService().saveOrUpdate(currentFeature);
1685
            importer.getVocabularyService().saveOrUpdate(currentFeature.getVocabulary());
1686
        }
1687
        return currentFeature;
1688
    }
1689

    
1690
    /**
1691
     * @param references
1692
     * handle cases where the bibref are inside <p> and outside
1693
     */
1694
    @SuppressWarnings("rawtypes")
1695
    private void extractReferenceRawText(NodeList references, NonViralName<?> nameToBeFilled, Reference refMods,
1696
            Taxon acceptedTaxon) {
1697
        logger.info("extractReferenceRawText");
1698
        String refString="";
1699
        currentMyName= new MyName(true);
1700
        for (int j=0;j<references.getLength();j++){
1701
            acceptedTaxon=CdmBase.deproxy(acceptedTaxon, Taxon.class);
1702
            //no bibref tag inside
1703
            //            System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1704
            if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1705

    
1706
                try {
1707
                	String followingText = null;  //needs to be checked if follText is possible
1708
                	//TODO create or not create?
1709
                    currentMyName = extractScientificName(references.item(j), refMods, followingText);
1710
                } catch (TransformerFactoryConfigurationError e) {
1711
                    logger.warn(e);
1712
                } catch (TransformerException e) {
1713
                    logger.warn(e);
1714
                }
1715

    
1716
                //                name=name.trim();
1717
            }
1718
            if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1719
                refString = references.item(j).getTextContent().trim();
1720
            }
1721
            if(references.item(j).getNodeName().equalsIgnoreCase("#text") && !references.item(j).getTextContent().trim().isEmpty()){
1722
                //
1723
               if (!currentMyName.getStatus().isEmpty()){
1724
            	   String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1725
	               	if (nomNovStatus != null){
1726
	               		nameToBeFilled.setAppendedPhrase(nomNovStatus);
1727
	               	}else{
1728
	            	   try {
1729
	                        NomenclaturalStatusType  statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1730
                            nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1731
	                    } catch (UnknownCdmTypeException e) {
1732
	                        addProblematicStatusToFile(currentMyName.getStatus());
1733
	                        logger.warn("Problem with status");
1734
	                    }
1735
	               	}
1736
                }
1737

    
1738
                String fullLineRefName = references.item(j).getTextContent().trim();
1739
                int nameOrRefOrOther=2;
1740
                nameOrRefOrOther=askIfNameContained(fullLineRefName);
1741
                if (nameOrRefOrOther==0){
1742
                    TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1743
                    Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1744

    
1745
                    Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1746
                    //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1747
                    boolean synoExist = false;
1748
                    for (Synonym syn: synonymsSet){
1749
                        //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1750
                        boolean a =syn.getName().equals(synonym.getName());
1751
                        boolean b = syn.getSec().equals(synonym.getSec());
1752
                        if (a && b) {
1753
                            synoExist=true;
1754
                        }
1755
                    }
1756
                    if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1757
                        sourceHandler.addSource(refMods, synonym);
1758

    
1759
                        acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1760
                    }
1761
                }
1762

    
1763
                if (nameOrRefOrOther==1){
1764
                    Reference re = ReferenceFactory.newGeneric();
1765
                    re.setTitleCache(fullLineRefName, true);
1766

    
1767
                    /* TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1768
                    if (nameTBF.hasProblem() &&
1769
                            !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1770
                        addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1771
                        nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1772
                    }
1773
                    nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1774
                     */
1775
                    TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1776
                    Synonym synonym = Synonym.NewInstance(nameTBF, re);
1777

    
1778
                    Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1779
                    //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1780
                    boolean synoExist = false;
1781
                    for (Synonym syn: synonymsSet){
1782
                        //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1783
                        boolean a =syn.getName().equals(synonym.getName());
1784
                        boolean b = syn.getSec().equals(synonym.getSec());
1785
                        if (a && b) {
1786
                            synoExist=true;
1787
                        }
1788
                    }
1789
                    if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1790
                        sourceHandler.addSource(refMods, synonym);
1791

    
1792
                        acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1793
                    }
1794

    
1795
                }
1796

    
1797

    
1798
                if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1799
                    setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1800
                }
1801
            }
1802

    
1803
            if(!currentMyName.getName().isEmpty()){
1804
                //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
1805
                if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName.getName().trim())){
1806
                    Reference refS = ReferenceFactory.newGeneric();
1807
                    refS.setTitleCache(refString, true);
1808
                    //                            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1809
                    //                            acceptedTaxon.addDescription(td);
1810
                    //                            acceptedTaxon.addSource(refSource);
1811
                    //
1812
                    //                            TextData textData = TextData.NewInstance(Feature.CITATION());
1813
                    //
1814
                    //                            textData.addSource(null, null, refS, null);
1815
                    //                            td.addElement(textData);
1816
                    //                            td.addSource(refSource);
1817
                    //                            importer.getDescriptionService().saveOrUpdate(td);
1818

    
1819

    
1820
                    if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1821
                        setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1822

    
1823
                    }
1824

    
1825
                    acceptedTaxon.getName().setNomenclaturalReference(refS);
1826
                }else{
1827
                    TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1828
                    Synonym synonym = null;
1829
                    if (! currentMyName.getStatus().isEmpty()){
1830
                    	String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1831
                    	if (nomNovStatus != null){
1832
                    		nameToBeFilled.setAppendedPhrase(nomNovStatus);
1833
                    	}else{
1834
	                    	try {
1835
	                            NomenclaturalStatusType statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1836
	                            nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1837
	                            synonym = Synonym.NewInstance(nameTBF, refMods);
1838
	                        } catch (UnknownCdmTypeException e) {
1839
	                            addProblematicStatusToFile(currentMyName.getStatus());
1840
	                            logger.warn("Problem with status");
1841
	                            synonym = Synonym.NewInstance(nameTBF, refMods);
1842
	                            synonym.setAppendedPhrase(currentMyName.getStatus());
1843
	                        }
1844
                    	}
1845
                    }else{
1846
                        synonym =  Synonym.NewInstance(nameTBF, refMods);
1847
                    }
1848

    
1849

    
1850
                    if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1851
                        setLSID(currentMyName.getIdentifier(), synonym);
1852
                    }
1853

    
1854
                    Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1855
                    //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1856
                    boolean synoExist = false;
1857
                    for (Synonym syn: synonymsSet){
1858
                        //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1859
                        boolean a =syn.getName().equals(synonym.getName());
1860
                        boolean b = syn.getSec().equals(synonym.getSec());
1861
                        if (a && b) {
1862
                            synoExist=true;
1863
                        }
1864
                    }
1865
                    if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1866
                        sourceHandler.addSource(refMods, synonym);
1867

    
1868
                        acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1869
                    }
1870
                }
1871
            }
1872
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1873
        }
1874
    }
1875

    
1876

    
1877

    
1878
    /**
1879
     * @param identifier
1880
     * @param acceptedTaxon
1881
     */
1882
    @SuppressWarnings("rawtypes")
1883
    private void setLSID(String identifier, TaxonBase<?> taxon) {
1884
        //logger.info("setLSID");
1885
        //        boolean lsidok=false;
1886
        String id = identifier.split("__")[0];
1887
        String source = identifier.split("__")[1];
1888
        if (id.indexOf("lsid")>-1){
1889
            try {
1890
                LSID lsid = new LSID(id);
1891
                taxon.setLsid(lsid);
1892
                //                lsidok=true;
1893
            } catch (MalformedLSIDException e) {
1894
                logger.warn("Malformed LSID");
1895
            }
1896

    
1897
        }
1898

    
1899
        //logger.info("search reference for LSID");
1900
        //  if ((id.indexOf("lsid")<0) || !lsidok){
1901
        //ADD ORIGINAL SOURCE ID EVEN IF LSID
1902
        Reference re = null;
1903
        Pager<Reference> references = importer.getReferenceService().findByTitle(Reference.class, source, MatchMode.EXACT, null, 1, null, null, null);
1904
        if( references !=null && references.getCount()>0){
1905
            re=references.getRecords().get(0);
1906
        }
1907
        //logger.info("search reference for LSID-end");
1908
        if(re == null){
1909
            re = ReferenceFactory.newGeneric();
1910
            re.setTitleCache(source, true);
1911
            importer.getReferenceService().saveOrUpdate(re);
1912
        }
1913
        re=CdmBase.deproxy(re, Reference.class);
1914

    
1915
        //logger.info("search source for LSID");
1916
        Set<IdentifiableSource> sources = taxon.getSources();
1917
        boolean lsidinsource=false;
1918
        boolean urlinsource=false;
1919
        for (IdentifiableSource src:sources){
1920
            if (id.equalsIgnoreCase(src.getIdInSource()) && re.getTitleCache().equals(src.getCitation().getTitleCache())) {
1921
                lsidinsource=true;
1922
            }
1923
            if (src.getIdInSource() == null && re.getTitleCache().equals(sourceUrlRef.getTitleCache())) {
1924
                urlinsource=true;
1925
            }
1926
        }
1927
        if(!lsidinsource) {
1928
            taxon.addSource(OriginalSourceType.Import, id,null,re,null);
1929
        }
1930
        if(!urlinsource)
1931
        {
1932
            sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
1933
            taxon.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
1934
            // }
1935
        }
1936

    
1937
    }
1938

    
1939
    /**
1940
     * try to solve a parsing problem for a scientific name
1941
     * @param original : the name from the OCR document
1942
     * @param name : the tagged version
1943
     * @param parser
1944
     * @return the corrected TaxonNameBase
1945
     */
1946
    /*   @SuppressWarnings({ "unchecked", "rawtypes" })
1947
    private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
1948
        Map<String,String> ato = namesMap.get(original);
1949
        if (ato == null) {
1950
            ato = namesMap.get(original+" "+author);
1951
        }
1952

    
1953

    
1954
        if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
1955
            rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1956
        }
1957
        if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
1958
            rank = getRank(ato);
1959
        }
1960
        //        TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1961
        TaxonNameBase<?,?> nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1962
        //                logger.info("RANK: "+rank);
1963
        int retry=0;
1964
        List<ParserProblem> problems = nameTBF.getParsingProblems();
1965
        for (ParserProblem pb:problems) {
1966
            System.out.println(pb.toString());
1967
        }
1968
        while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1969
            addProblemNameToFile(name,author,nomenclaturalCode,rank);
1970
            String fullname=name;
1971
            if(! skippQuestion) {
1972
                fullname =  getFullReference(name,nameTBF.getParsingProblems());
1973
            }
1974
            if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1975
                nameTBF = TaxonNameFactory.NewBotanicalInstance(null);
1976
            }
1977
            if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1978
                nameTBF = ZoologicalName.NewInstance(null);
1979
            }
1980
            if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1981
                nameTBF= BacterialName.NewInstance(null);
1982
            }
1983
            parser.parseReferencedName(nameTBF, fullname, rank, false);
1984
            retry++;
1985
        }
1986
        if (retry == 1){
1987
            if(author != null){
1988
                if (name.indexOf(author)>-1) {
1989
                    nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
1990
                } else {
1991
                    nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1992
                }
1993
                if (nameTBF.hasProblem()){
1994
                    if (name.indexOf(author)>-1) {
1995
                        addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
1996
                    } else {
1997
                        addProblemNameToFile(name,author,nomenclaturalCode,rank);
1998
                    }
1999
                    //                    System.out.println("TBF still has problems "+nameTBF.hasProblem());
2000
                    problems = nameTBF.getParsingProblems();
2001
                    for (ParserProblem pb:problems) {
2002
                        System.out.println(pb.toString());
2003
                    }
2004
                    nameTBF.setFullTitleCache(name, true);
2005
                }else{
2006
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2007
                        ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2008
                    }
2009
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2010
                        ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2011
                    }
2012
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2013
                        ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2014
                    }
2015
                }
2016
                //                    logger.info("FULL TITLE CACHE "+name);
2017
            }else{
2018
                nameTBF.setFullTitleCache(name, true);
2019
            }
2020
        }
2021
        return nameTBF;
2022
    }
2023

    
2024
     */
2025

    
2026
    /**
2027
     * @param nomenclatureNode: the XML nodes
2028
     * @param nametosave: the list of objects to save into the CDM
2029
     * @param refMods: the current reference extracted from the MODS
2030
     * @return
2031
     */
2032
    @SuppressWarnings({ "rawtypes" })
2033
    private Taxon extractNomenclature(Node nomenclatureNode,  List<TaxonNameBase> nametosave, Reference refMods) throws ClassCastException{
2034
        refMods=CdmBase.deproxy(refMods, Reference.class);
2035

    
2036
        logger.info("extractNomenclature");
2037
        NodeList children = nomenclatureNode.getChildNodes();
2038
        String freetext="";
2039
        Taxon acceptedTaxon = null;
2040
        //   INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2041

    
2042
        //        String fullContent = nomenclatureNode.getTextContent();
2043

    
2044
        NomenclaturalStatusType statusType = null;
2045
        String newNameStatus = null;
2046
        //TODO
2047
        for (int i=0;i<children.getLength();i++){
2048
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:status")){
2049
                String status = children.item(i).getTextContent().trim();
2050

    
2051
                if (!status.isEmpty()){
2052
                	if (newNameStatus(status) != null){
2053
                		newNameStatus = newNameStatus(status);
2054
                    }else{
2055
	                    try {
2056
	                        statusType = nomStatusString2NomStatus(status);
2057
	                    } catch (UnknownCdmTypeException e) {
2058
	//                    	nomNovStatus;
2059
	                    	addProblematicStatusToFile(status);
2060
	                        logger.warn("Problem with status: " + status);
2061
	                    }
2062
                    }
2063
                }
2064
            }
2065
        }
2066

    
2067
        boolean containsSynonyms=false;
2068
        boolean wasSynonym = false;
2069
        usedFollowingTextPrefix = null;  //reset
2070

    
2071
        for (int i=0; i<children.getLength(); i++){
2072
        	Node childNode = children.item(i);
2073
        	String childName = childNode.getNodeName();
2074

    
2075

    
2076
        	//following text
2077
        	followingText = null;
2078
        	if ( i + 1 < children.getLength()){
2079
            	Node followingTextNode = children.item(i +1);
2080
            	if (followingTextNode.getNodeName().equals("#text") && !followingTextNode.getTextContent().matches("\\s*") ){
2081
            		followingText = followingTextNode.getTextContent();
2082
            	}
2083
        	}
2084

    
2085
        	//traverse nodes
2086
            if (childName.equalsIgnoreCase("#text")) {
2087
                freetext = childNode.getTextContent().trim();
2088
                if (usedFollowingTextPrefix != null && freetext.startsWith(usedFollowingTextPrefix)){
2089
                	freetext = freetext.substring(usedFollowingTextPrefix.length());
2090
                }
2091
                usedFollowingTextPrefix = null;  //reset
2092
            }else if (childName.equalsIgnoreCase("tax:collection_event")) {
2093
                //                System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2094
                extractMaterialsDirect(childNode, acceptedTaxon, refMods, "collection", currentMyName.getTaxonNameBase());
2095
            }else if(childName.equalsIgnoreCase("tax:name")){
2096
            	NonViralName<?> nameToBeFilled;
2097
                //System.out.println("HANDLE FIRST NAME OF THE LIST");
2098
                if(!containsSynonyms){
2099
                	wasSynonym = false;
2100

    
2101
                	//System.out.println("I : "+i);
2102
                    currentMyName = new MyName(false);
2103
                    try {
2104
                        currentMyName = extractScientificName(childNode, refMods, followingText);
2105
                        treatmentMainName = currentMyName.getNewName();
2106
                        originalTreatmentName = currentMyName.getOriginalName();
2107

    
2108
                    } catch (TransformerFactoryConfigurationError e1) {
2109
                        throw new RuntimeException(e1);
2110
                    } catch (TransformerException e1) {
2111
                    	throw new RuntimeException(e1);
2112
                    }
2113

    
2114
                    if (currentMyName.getRank().equals(Rank.UNKNOWN_RANK()) || currentMyName.getRank().isLower(state2.getConfig().getMaxRank()) || currentMyName.getRank().equals(state2.getConfig().getMaxRank())){
2115
                        maxRankRespected=true;
2116

    
2117
                        nameToBeFilled=currentMyName.getTaxonNameBase();
2118

    
2119
                        //   acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2120
                        acceptedTaxon=currentMyName.getTaxon();
2121
                        //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2122

    
2123

    
2124
                        boolean statusMatch=false;
2125
                        if(acceptedTaxon !=null ){
2126
                            acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2127
                            statusMatch=compareStatus(acceptedTaxon, statusType);
2128
                            //System.out.println("statusMatch: "+statusMatch);
2129
                        }
2130
                        if (acceptedTaxon ==null || (acceptedTaxon != null && !statusMatch)){
2131

    
2132
                            nameToBeFilled=currentMyName.getTaxonNameBase();
2133
                            if (nameToBeFilled != null){
2134
                                if (!originalTreatmentName.isEmpty()) {
2135
                                    TaxonNameDescription td = TaxonNameDescription.NewInstance();
2136
                                    td.setTitleCache(originalTreatmentName, true);
2137
                                    nameToBeFilled.addDescription(td);
2138
                                }
2139

    
2140
                                if(statusType != null) {
2141
                                    nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2142
                                }
2143
                                if(newNameStatus != null){
2144
                                	nameToBeFilled.setAppendedPhrase(newNameStatus);
2145
                                }
2146
                                sourceHandler.addSource(refMods, nameToBeFilled);
2147

    
2148
                                if (nameToBeFilled.getNomenclaturalReference() == null) {
2149
                                    acceptedTaxon= Taxon.NewInstance(nameToBeFilled,refMods);
2150
                                    //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2151
                                }
2152
                                else {
2153
                                    acceptedTaxon= Taxon.NewInstance(nameToBeFilled,(Reference) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
2154
                                    //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2155
                                }
2156

    
2157
                                sourceHandler.addSource(refMods, acceptedTaxon);
2158

    
2159
                                if(!state2.getConfig().doKeepOriginalSecundum()) {
2160
                                    acceptedTaxon.setSec(state2.getConfig().getSecundum());
2161
                                    //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2162
                                    //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2163
                                }
2164

    
2165
                                if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2166
                                    setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2167
                                }
2168

    
2169

    
2170
                                importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2171
                                acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2172
                            }
2173

    
2174
                        }else{
2175
                            acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2176
                            Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2177
                            boolean sourcelinked=false;
2178
                            for (IdentifiableSource source:sources){
2179
                                if (source.getCitation().getTitleCache().equalsIgnoreCase(refMods.getTitleCache())) {
2180
                                    sourcelinked=true;
2181
                                }
2182
                            }
2183
                            if (!state2.getConfig().doKeepOriginalSecundum()) {
2184
                                acceptedTaxon.setSec(state2.getConfig().getSecundum());
2185
                                //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2186
                                //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2187
                            }
2188
                            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2189

    
2190
                            if (!sourcelinked){
2191
                                sourceHandler.addSource(refMods, acceptedTaxon);
2192
                            }
2193
                            if (!sourcelinked || !state2.getConfig().doKeepOriginalSecundum()){
2194

    
2195
                                if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2196
                                    //FIXME are these identifiers really related to taxa, not names? Exiting LSIDs come from Zoobank, urn:lsid:biosci.ohio-state.edu:osuc_concepts:134826 (Ants)
2197
                                	setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2198
                                }
2199
                                importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2200
                            }
2201
                        }
2202
                    }else{
2203
                        maxRankRespected=false;
2204
                    }
2205
                    containsSynonyms=true;  //all folowing names are handled as synonyms
2206
                }else{
2207
                    try{
2208
                        extractSynonyms(childNode, acceptedTaxon, refMods, followingText);
2209
                        wasSynonym = true;
2210

    
2211
                    }catch(NullPointerException e){
2212
                        logger.warn("null pointer exception, the accepted taxon might be null");
2213
                    }
2214
                }
2215
                containsSynonyms=true;
2216
            }else if (childName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
2217
                reloadClassification();
2218
                //extract the References within the document
2219
                extractReferences(childNode,nametosave,acceptedTaxon,refMods);
2220
            }else if (childName.equalsIgnoreCase("tax:bibref")){
2221
            	logger.warn(childName + " still preliminary");
2222

    
2223
            	NonViralName<?> currentName = currentMyName == null ? null : currentMyName.getTaxonNameBase();
2224
            	boolean handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2225
            	if (! handled){
2226
            		setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2227
            	}
2228
            }else{
2229
            	logger.warn(childName + " not yet handled");
2230
            }
2231
            if(!stringIsEmpty(freetext.trim())) {;
2232
                if (! freetext.matches("\\d\\.?")){
2233
                	NonViralName<?> currentName = currentMyName == null ? null : currentMyName.getTaxonNameBase();
2234
                	boolean handled = false;
2235
                	if (currentName != null && !wasSynonym){
2236
                		handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2237
                	}
2238
                	if (! handled){
2239
                		setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2240
                	}
2241
                }
2242

    
2243
                 freetext = "";
2244
            }
2245

    
2246
        }
2247
        //importer.getClassificationService().saveOrUpdate(classification);
2248
        return acceptedTaxon;
2249
    }
2250

    
2251

    
2252

    
2253

    
2254
	/**
2255
     * @return
2256
     */
2257

    
2258
    private boolean compareStatus(TaxonBase<?> t, NomenclaturalStatusType statusType) {
2259
        //logger.info("compareStatus");
2260
        boolean statusMatch=false;
2261
        //found one taxon
2262
        Set<NomenclaturalStatus> status = t.getName().getStatus();
2263
        if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
2264
            for (NomenclaturalStatus st:status){
2265
                NomenclaturalStatusType stype = st.getType();
2266
                if (stype.toString().equalsIgnoreCase(statusType.toString())) {
2267
                    statusMatch=true;
2268
                }
2269
            }
2270
        }
2271
        else{
2272
            if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
2273
                statusMatch=true;
2274
            }
2275
        }
2276
        return statusMatch;
2277
    }
2278

    
2279
    /**
2280
     * @param acceptedTaxon: the current acceptedTaxon
2281
     * @param ref: the current reference extracted from the MODS
2282
     * @return the parent for the current accepted taxon
2283
     */
2284
    /*  private Taxon createParent(Taxon acceptedTaxon, Reference ref) {
2285
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2286

    
2287
        List<Rank> rankList = new ArrayList<Rank>();
2288
        rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2289

    
2290
        List<String> rankListStr = new ArrayList<String>();
2291
        for (Rank r:rankList) {
2292
            rankListStr.add(r.toString());
2293
        }
2294
        String r="";
2295
        String s = acceptedTaxon.getTitleCache();
2296
        Taxon tax = null;
2297
        if(!skippQuestion){
2298
            int addTaxon = askAddParent(s);
2299
            logger.info("ADD TAXON: "+addTaxon);
2300
            if (addTaxon == 0 ){
2301
                Taxon tmp = askParent(acceptedTaxon, classification);
2302
                if (tmp == null){
2303
                    s = askSetParent(s);
2304
                    r = askRank(s,rankListStr);
2305

    
2306
                    TaxonNameBase<?,?> nameToBeFilled = null;
2307
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2308
                        nameToBeFilled = TaxonNameFactory.NewBotanicalInstance(null);
2309
                    }
2310
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2311
                        nameToBeFilled = ZoologicalName.NewInstance(null);
2312
                    }
2313
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2314
                        nameToBeFilled = BacterialName.NewInstance(null);
2315
                    }
2316
                    nameToBeFilled.setTitleCache(s, true);
2317
                    nameToBeFilled.setRank(getRank(r), true);
2318

    
2319
                    tax = Taxon.NewInstance(nameToBeFilled, ref);
2320
                }
2321
                else{
2322
                    tax=tmp;
2323
                }
2324

    
2325
                createParent(tax, ref);
2326
                //            logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2327
                classification.addParentChild(tax, acceptedTaxon, ref, null);
2328
            }
2329
            else{
2330
                classification.addChildTaxon(acceptedTaxon, ref, null);
2331
                tax=acceptedTaxon;
2332
            }
2333
        } else{
2334
            classification.addChildTaxon(acceptedTaxon, ref, null);
2335
            tax=acceptedTaxon;
2336
        }
2337
        //        logger.info("RETURN: "+tax );
2338
        return tax;
2339

    
2340
    }
2341

    
2342
     */
2343

    
2344

    
2345
    private MyName  extractScientificNameSynonym(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2346
        //System.out.println("extractScientificNameSynonym");
2347
        logger.info("extractScientificNameSynonym");
2348
        String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2349
        List<String> rankListToPrint = new ArrayList<String>();
2350
        for (String r : rankListToPrint_tmp) {
2351
            rankListToPrint.add(r.toLowerCase());
2352
        }
2353

    
2354
        Rank rank = Rank.UNKNOWN_RANK();
2355
        NodeList children = name.getChildNodes();
2356
        String originalName="";
2357
        String fullName = "";
2358
        String newName="";
2359
        String identifier="";
2360
        HashMap<String, String> atomisedMap = new HashMap<String, String>();
2361
        List<String> atomisedName= new ArrayList<String>();
2362

    
2363
        String rankStr = "";
2364
        Rank tmpRank ;
2365

    
2366
        String status= extractStatus(children);
2367

    
2368
        for (int i=0;i<children.getLength();i++){
2369
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2370
                NodeList atom = children.item(i).getChildNodes();
2371
                for (int k=0;k<atom.getLength();k++){
2372
                    identifier = extractIdentifier(identifier, atom.item(k));
2373
                    tmpRank = null;
2374
                    rankStr = atom.item(k).getNodeName().toLowerCase();
2375
                    //                    logger.info("RANKSTR:*"+rankStr+"*");
2376
                    if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2377
                        rankStr=atom.item(k).getTextContent().trim();
2378
                        tmpRank = getRank(rankStr);
2379
                    }
2380
                    //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2381
                    if (tmpRank != null){
2382
                        rank=tmpRank;
2383
                    }
2384
                    atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2385
                }
2386
                addAtomisedNamesToMap(rankListToPrint, rank, atomisedName, atom);
2387
            }
2388
            if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2389
                //                logger.info("name non atomised: "+children.item(i).getTextContent());
2390
                fullName = children.item(i).getTextContent().trim();
2391
                //                logger.info("fullname: "+fullName);
2392
            }
2393
        }
2394
        originalName=fullName;
2395
        fullName = cleanName(fullName, atomisedName);
2396
        namesMap.put(fullName,atomisedMap);
2397

    
2398
        String atomisedNameStr = getAtomisedNameStr(atomisedName);
2399

    
2400
        if (fullName != null){
2401
            //            System.out.println("fullname: "+fullName);
2402
            //            System.out.println("atomised: "+atomisedNameStr);
2403
            if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2404
                if (skippQuestion){
2405
                    //                    String defaultN = "";
2406
                    if (atomisedNameStr.length()>fullName.length()) {
2407
                        newName=atomisedNameStr;
2408
                    } else {
2409
                        if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2410
                            newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2411
                        } else {
2412
                            newName=fullName;
2413
                        }
2414
                    }
2415
                } else {
2416
                    newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2417
                }
2418
            } else {
2419
                newName=fullName;
2420
            }
2421
        }
2422
        //not really needed
2423
        //        rank = askForRank(newName, rank, nomenclaturalCode);
2424
        //        System.out.println("atomised: "+atomisedMap.toString());
2425

    
2426
        //        String[] names = new String[5];
2427
        MyName myname = new MyName(true);
2428

    
2429
        //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2430
        //        System.out.println(atomisedMap.keySet());
2431
        fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2432
        myname.setOriginalName(fullName);
2433
        myname.setNewName(newName);
2434
        myname.setRank(rank);
2435
        myname.setIdentifier(identifier);
2436
        myname.setStatus(status);
2437
        myname.setSource(refMods);
2438

    
2439
        //        boolean higherAdded=false;
2440

    
2441

    
2442
        boolean parseNameManually=false;
2443
        INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
2444
        ITaxonNameBase  nameToBeFilledTest ;
2445

    
2446
        //if selected the atomised version
2447
        if(newName==atomisedNameStr){
2448
            nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2449
            if (nameToBeFilledTest.hasProblem()){
2450
                addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2451
                nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode, rank);
2452
                if (nameToBeFilledTest.hasProblem()){
2453
                    addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2454
                    parseNameManually=true;
2455
                }
2456
            }
2457
        }else{
2458
            nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2459
            if (nameToBeFilledTest.hasProblem()){
2460
                addProblemNameToFile("fullversion",fullName, nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2461
                nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2462
                parseNameManually=true;
2463
                if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2464
                    addNameDifferenceToFile(originalName,atomisedNameStr);
2465
                }
2466
            }
2467
        }
2468

    
2469
        if(parseNameManually){
2470
            //System.out.println("DO IT MANUALLY");
2471
        	if (this.state2.getConfig().isUseOldUnparsedSynonymExtraction()){
2472
                createUnparsedSynonym(rank, newName, atomisedMap, myname);
2473
        	}else{
2474
        		createUnparsedSynonymNew(rank, newName, atomisedMap, myname, refMods);;
2475
        	}
2476
        } else{
2477
            //System.out.println("AUTOMATIC!");
2478
            //            createAtomisedTaxonString(newName, atomisedMap, myname);
2479
            myname.setParsedName(nameToBeFilledTest);
2480
            myname.buildTaxon();
2481
        }
2482
        //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2483
        return myname;
2484
    }
2485

    
2486

    
2487
	/**
2488
     * @param name
2489
     * @throws TransformerFactoryConfigurationError
2490
     * @throws TransformerException
2491
     * @return a list of possible names
2492
     */
2493
    @SuppressWarnings({"rawtypes" })
2494
    private MyName extractScientificName(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2495
        logger.info("extractScientificName");
2496

    
2497
        String[] rankListToPrintLowerCase_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2498
        List<String> rankListToPrint = Arrays.asList(rankListToPrintLowerCase_tmp);
2499

    
2500
        Rank rank = Rank.UNKNOWN_RANK();
2501
        NodeList children = name.getChildNodes();
2502
        String originalName = "";
2503
        String fullName = "";
2504
        String newName = "";
2505
        String identifier = "";
2506
        HashMap<String, String> atomisedMap = new HashMap<String, String>();
2507
        List<String> atomisedNameList= new ArrayList<String>();
2508

    
2509
        String status= extractStatus(children);
2510

    
2511
        for (int i=0;i<children.getLength();i++){
2512
        	Node nameChild = children.item(i);
2513
            if(nameChild.getNodeName().equalsIgnoreCase("tax:xmldata")){
2514
                NodeList xmlDataChildren = nameChild.getChildNodes();
2515
                for (int k=0;k<xmlDataChildren.getLength();k++){
2516
                	Node xmlDataChild = xmlDataChildren.item(k);
2517
                    identifier = extractIdentifier(identifier, xmlDataChild);
2518
                    String rankStr = xmlDataChild.getNodeName().toLowerCase();
2519
                    if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2520
                        rankStr=xmlDataChild.getTextContent().trim();
2521
                        Rank tmpRank = getRank(rankStr);
2522
                        if (tmpRank != null){
2523
                            rank=tmpRank;
2524
                        }
2525
                    }
2526
                    //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2527

    
2528
                    atomisedMap.put(rankStr.toLowerCase(),xmlDataChild.getTextContent().trim());
2529
                }
2530
                addAtomisedNamesToMap(rankListToPrint, rank, atomisedNameList, xmlDataChildren);
2531
            }
2532
            else if(nameChild.getNodeName().equalsIgnoreCase("#text") && ! nameChild.getTextContent().matches("\\s*")){
2533
                //                logger.info("name non atomised: "+children.item(i).getTextContent());
2534
                fullName = nameChild.getTextContent().trim();
2535
                //                logger.info("fullname: "+fullName);
2536
            }
2537
        }
2538
        originalName=fullName;
2539
        fullName = cleanName(fullName, atomisedNameList);
2540
        namesMap.put(fullName,atomisedMap);
2541

    
2542
        String atomisedNameStr = getAtomisedNameStr(atomisedNameList);
2543

    
2544
        if (fullName != null){
2545
            if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2546
                if (skippQuestion){
2547
                    if (atomisedNameStr.length()>fullName.length()) {
2548
                        newName = atomisedNameStr;
2549
                    } else {
2550
                        if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2551
                            newName = askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2552
                        } else {
2553
                            newName = fullName;
2554
                        }
2555
                    }
2556
                } else {
2557
                    newName=askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2558
                }
2559
            } else {
2560
                newName=fullName;
2561
            }
2562
        }
2563
        //not really needed
2564
        //        rank = askForRank(newName, rank, nomenclaturalCode);
2565
        //        System.out.println("atomised: "+atomisedMap.toString());
2566

    
2567
        //        String[] names = new String[5];
2568
        MyName myname = new MyName(false);
2569

    
2570
        //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2571
        //        System.out.println(atomisedMap.keySet());
2572
        fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2573
        myname.setOriginalName(fullName);
2574
        myname.setNewName(newName);
2575

    
2576
        myname.setRank(rank);
2577
        myname.setIdentifier(identifier);
2578
        myname.setStatus(status);
2579
        myname.setSource(refMods);
2580

    
2581
        //        boolean higherAdded=false;
2582

    
2583

    
2584
        boolean parseNameManually=false;
2585
        INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2586
        ITaxonNameBase  nameToBeFilledTest = null;
2587

    
2588
        //if selected the atomised version
2589
        if(newName==atomisedNameStr){
2590
            nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2591
            if (nameToBeFilledTest.hasProblem()){
2592
        	    addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2593
                nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2594
                if (nameToBeFilledTest.hasProblem()){
2595
                    addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2596
                    parseNameManually=true;
2597
                }
2598
            }
2599
        }else{
2600
            nameToBeFilledTest = parseWithExtension(parser, fullName , rank, followingText, atomisedMap);
2601
            if (nameToBeFilledTest.hasProblem()){
2602
                addProblemNameToFile("fullversion",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2603
                nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2604
                parseNameManually=true;
2605
                if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2606
                    addNameDifferenceToFile(originalName,atomisedNameStr);
2607
                }
2608
            }
2609
        }
2610

    
2611
        //System.out.println("parseNameManually: "+parseNameManually);
2612
        if(parseNameManually){
2613
            createAtomisedTaxon(rank, newName, atomisedMap, myname);
2614
        }
2615
        else{
2616
            createAtomisedTaxonString(newName, atomisedMap, myname);
2617
            myname.setParsedName(nameToBeFilledTest);
2618
            //TODO correct handling of createIfNotExists
2619
           	myname.buildTaxon();
2620
        }
2621
        return myname;
2622

    
2623
    }
2624

    
2625
    private ITaxonNameBase parseWithExtension(INonViralNameParser parser, String atomisedNameStr, Rank rank, String followingText, HashMap<String, String> atomisedMap) {
2626
    	Object[] nameExtensionResult = getPossibleExtension(followingText, atomisedMap, nomenclaturalCode);
2627

    
2628
    	ITaxonNameBase name = parser.parseFullName(atomisedNameStr, nomenclaturalCode, rank);
2629
    	if (nameExtensionResult != null && nameExtensionResult[0] != null){
2630
    		String ext = (String)nameExtensionResult[0];
2631
    		ITaxonNameBase extName =parser.parseFullName(atomisedNameStr + " " + ext, nomenclaturalCode, rank);
2632
    		if (! extName.hasProblem()){
2633
    			name = extName;
2634
    			this.usedFollowingTextPrefix = ext;
2635
    			//TODO do we need to fill the atomisedMap at all?
2636
    			if ((Boolean)(nameExtensionResult[1])){
2637
    				//TODO
2638
    			}
2639
    			if ((Boolean)(nameExtensionResult[2])){
2640
    				//TODO BasionymYear etc.
2641
    				Integer origYear = ((ZoologicalName)name).getPublicationYear();
2642
    				if (origYear != null){
2643
        				atomisedMap.put(PUBLICATION_YEAR, origYear.toString());
2644
    				}
2645
    			}
2646
    		}
2647
    	}
2648
		return name;
2649
	}
2650

    
2651
	private Object[] getPossibleExtension(String followingText, HashMap<String, String> atomisedMap, NomenclaturalCode nomenclaturalCode) {
2652
		if (StringUtils.isBlank(followingText)){
2653
			return null;
2654
		}
2655

    
2656
    	boolean includeAuthor = true;
2657
    	boolean includeYear = false;
2658
		if (atomisedMap.containsKey("dwc:scientificnameauthorship")){
2659
			includeAuthor = false;
2660
		}
2661
    	if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2662
    		includeYear = true;
2663
    	}
2664
    	String patternStr = "";
2665
    	if (includeAuthor){
2666
    		patternStr += NonViralNameParserImplRegExBase.capitalWord;
2667
    	}
2668
    	if (includeYear){
2669
    		patternStr += "\\s*(,|\\s+)\\s*" + "(17|18|19|20)" + "\\d{2}" ;
2670
    	}
2671
    	String match = null;
2672
    	if (! patternStr.isEmpty()){
2673
    		Pattern pattern = Pattern.compile("^" + patternStr);
2674
    		Matcher matcher = pattern.matcher(followingText.trim());
2675
    		if (matcher.find()){
2676
    			match = matcher.group();
2677
    		}
2678
    	}
2679

    
2680
		return new Object[]{match, includeAuthor, includeYear};
2681
	}
2682

    
2683
	/**
2684
     * @param atomisedName
2685
     * @return
2686
     */
2687
    private String getAtomisedNameStr(List<String> atomisedName) {
2688
        //logger.info("getAtomisedNameStr");
2689
        String atomisedNameStr = StringUtils.join(atomisedName," ");
2690
        while(atomisedNameStr.contains("  ")) {
2691
            atomisedNameStr=atomisedNameStr.replace("  ", " ");
2692
        }
2693
        atomisedNameStr=atomisedNameStr.trim();
2694
        return atomisedNameStr;
2695
    }
2696

    
2697
    /**
2698
     * @param children
2699
     * @param status
2700
     * @return
2701
     */
2702
    private String extractStatus(NodeList children) {
2703
        logger.info("extractStatus");
2704
        String status="";
2705
        for (int i=0;i<children.getLength();i++){
2706
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:status") ||
2707
                    (children.item(i).getNodeName().equalsIgnoreCase("tax:namePart") &&
2708
                            children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2709
                status = children.item(i).getTextContent().trim();
2710
            }
2711
        }
2712
        return status;
2713
    }
2714

    
2715
    /**
2716
     * @param identifier
2717
     * @param atom
2718
     * @param k
2719
     * @return
2720
     */
2721
    private String extractIdentifier(String identifier, Node atom) {
2722
        //logger.info("extractIdentifier");
2723
        if (atom.getNodeName().equalsIgnoreCase("tax:xid")){
2724
            try{
2725
                identifier = atom.getAttributes().getNamedItem("identifier").getNodeValue();
2726
            }catch(Exception e){
2727
                System.out.println("pb with identifier, maybe empty");
2728
            }
2729
            try{
2730
                identifier+="__"+atom.getAttributes().getNamedItem("source").getNodeValue();
2731
            }catch(Exception e){
2732
                System.out.println("pb with identifier, maybe empty");
2733
            }
2734
        }
2735
        return identifier;
2736
    }
2737

    
2738
    /**
2739
     * @param rankListToPrint
2740
     * @param rank
2741
     * @param atomisedName
2742
     * @param atom
2743
     */
2744
    private void addAtomisedNamesToMap(List<String> rankListToPrint, Rank rank, List<String> atomisedName, NodeList atom) {
2745
        logger.info("addAtomisedNamesToMap");
2746
        for (int k=0;k<atom.getLength();k++){
2747
        	Node node = atom.item(k);
2748
        	String nodeName = node.getNodeName();
2749
            if (! nodeName.equalsIgnoreCase("dwc:taxonRank") ) {  //rank has been handled in higher method
2750
                if (nodeName.equalsIgnoreCase("dwc:subgenus") || nodeName.equalsIgnoreCase("dwcranks:subgenus")) {
2751
                    atomisedName.add("("+ node.getTextContent().trim()+")");
2752
                } else if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet") || nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2753
                       	if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet")){
2754
                            atomisedName.add("var. "+node.getTextContent().trim());
2755
                        }else if(nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2756
                            atomisedName.add("subsp. "+atom.item(k).getTextContent().trim());
2757
                        }
2758
                } else if(rankListToPrint.contains(nodeName.toLowerCase())) {
2759
                    atomisedName.add(node.getTextContent().trim());
2760
                } else{
2761
                    if (rank.isHigher(Rank.GENUS()) && (nodeName.indexOf("dwcranks:")>-1 || nodeName.indexOf("dwc:Family")>-1)) {
2762
                        atomisedName.add(node.getTextContent().trim());
2763
                    }else if (nodeName.equals("#text")){
2764
                    	String text = node.getTextContent();
2765
                    	if (StringUtils.isNotBlank(text)){
2766
                    		//TODO handle text
2767
                    		logger.warn("name xmldata contains text. This is unhandled");
2768
                    	}
2769
                    }else if (nodeName.matches("(?i)(dwc:Kingdom|dwc:Class|dwc:Order|dwc:Family)")){
2770
                    	//we currently do not use higher ranks information
2771
                    }else{
2772
                    	//TODO handle unhandled node
2773
                    	logger.warn("Unhandled node: " + nodeName);
2774
                    }
2775
                }
2776
            }
2777
        }
2778
    }
2779

    
2780
    /**
2781
     * @param fullName
2782
     * @param atomisedName
2783
     * @return
2784
     */
2785
    private String cleanName(String name, List<String> atomisedName) {
2786
        //logger.info("cleanName");
2787
        String fullName =name;
2788
        if (fullName != null){
2789
            fullName = fullName.replace("( ", "(");
2790
            fullName = fullName.replace(" )",")");
2791

    
2792
            if (fullName.trim().isEmpty()){
2793
                fullName=StringUtils.join(atomisedName," ");
2794
            }
2795

    
2796
            while(fullName.contains("  ")) {
2797
                fullName=fullName.replace("  ", " ");
2798
                //            logger.info("while");
2799
            }
2800
            fullName=fullName.trim();
2801
        }
2802
        return fullName;
2803
    }
2804

    
2805
    /**
2806
     * @param rank
2807
     * @param fullName
2808
     * @param atomisedMap
2809
     * @param myname
2810
     * @return
2811
     */
2812
    private String extractAuthorFromNames(Rank rank, String name, HashMap<String, String> atomisedMap, MyName myname) {
2813
        logger.info("extractAuthorFromNames");
2814
        String fullName=name;
2815
        if (atomisedMap.get("dwc:scientificnameauthorship") == null && fullName!=null){
2816
            //            System.out.println("rank : "+rank.toString());
2817
            if(rank.isHigher(Rank.SPECIES())){
2818
                try{
2819
                    String author=null;
2820
                    if(atomisedMap.get("dwcranks:subgenus") != null) {
2821
                        author = fullName.split(atomisedMap.get("dwcranks:subgenus"))[1].trim();
2822
                    }
2823
                    if(atomisedMap.get("dwc:subgenus") != null) {
2824
                        author = fullName.split(atomisedMap.get("dwc:subgenus"))[1].trim();
2825
                    }
2826
                    if(author == null) {
2827
                        if(atomisedMap.get("dwc:genus") != null) {
2828
                            author = fullName.split(atomisedMap.get("dwc:genus"))[1].trim();
2829
                        }
2830
                    }
2831
                    if(author != null){
2832
                        fullName = fullName.substring(0, fullName.indexOf(author));
2833
                        author=author.replaceAll(",","").trim();
2834
                        myname.setAuthor(author);
2835
                    }
2836
                }catch(Exception e){
2837
                    //could not extract the author
2838
                }
2839
            }
2840
            if(rank.equals(Rank.SPECIES())){
2841
                try{
2842
                    String author=null;
2843
                    if(author == null) {
2844
                        if(atomisedMap.get("dwc:species") != null) {
2845
                            String[] t = fullName.split(atomisedMap.get("dwc:species"));
2846
                            //                            System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2847
                            author = fullName.split(atomisedMap.get("dwc:species"))[1].trim();
2848
                            //                            System.out.println("AUTEUR "+author);
2849
                        }
2850
                    }
2851
                    if(author != null){
2852
                        fullName = fullName.substring(0, fullName.indexOf(author));
2853
                        author=author.replaceAll(",","").trim();
2854
                        myname.setAuthor(author);
2855
                    }
2856
                }catch(Exception e){
2857
                    //could not extract the author
2858
                }
2859
            }
2860
        }else{
2861
            myname.setAuthor(atomisedMap.get("dwc:scientificnameauthorship"));
2862
        }
2863
        return fullName;
2864
    }
2865

    
2866
    /**
2867
     * @param newName
2868
     * @param atomisedMap
2869
     * @param myname
2870
     */
2871
    private void createAtomisedTaxonString(String newName, HashMap<String, String> atomisedMap, MyName myname) {
2872
        logger.info("createAtomisedTaxonString "+atomisedMap);
2873
        if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
2874
            myname.setFamilyStr(atomisedMap.get("dwc:family"));
2875
        }
2876
        if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY())){
2877
            myname.setSubfamilyStr(atomisedMap.get("dwcranks:subfamily"));
2878
        }
2879
        if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
2880
            myname.setTribeStr(atomisedMap.get("dwcranks:tribe"));
2881
        }
2882
        if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
2883
            myname.setSubtribeStr(atomisedMap.get("dwcranks:subtribe"));
2884
        }
2885
        if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
2886
            myname.setGenusStr(atomisedMap.get("dwc:genus"));
2887
        }
2888
        if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2889
            myname.setSubgenusStr(atomisedMap.get("dwcranks:subgenus"));
2890
        }
2891
        if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2892
            myname.setSubgenusStr(atomisedMap.get("dwc:subgenus"));
2893
        }
2894
        if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
2895
            String n=newName;
2896
            if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2897
                n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2898
                n=n.replace("subsp.","");
2899
            }
2900
            if(atomisedMap.get("dwc:subspecies") != null) {
2901
                n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2902
                n=n.replace("subsp.","");
2903
            }
2904
            if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2905
                n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2906
                n=n.replace("var.","");
2907
                n=n.replace("v.","");
2908
            }
2909
            if(atomisedMap.get("dwcranks:formepithet") != null) {
2910
                //TODO
2911
                System.out.println("TODO FORMA");
2912
                n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
2913
                n=n.replace("forma","");
2914
            }
2915
            n=n.trim();
2916
            String author = myname.getAuthor();
2917
            if(n.split(" ").length>2){
2918

    
2919
                String n2=n.split(" ")[0]+" "+n.split(" ")[1];
2920
                String a= "";
2921
                try{
2922
                    a=n.split(n2)[1].trim();
2923
                }catch(Exception e){
2924
                    logger.info("no author in "+n+"?");}
2925

    
2926
                myname.setAuthor(a);
2927
                //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
2928
                n=n2;
2929

    
2930
            }
2931

    
2932
            myname.setSpeciesStr(atomisedMap.get("dwc:species"));
2933
            myname.setAuthor(author);
2934
        }
2935
        if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2936
            myname.setSubspeciesStr(atomisedMap.get("dwc:subspecies"));
2937
        }
2938
        if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2939
            myname.setSubspeciesStr(atomisedMap.get("dwc:infraspecificepithet"));
2940
        }
2941
        if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
2942
            myname.setVarietyStr(atomisedMap.get("dwcranks:varietyepithet"));
2943
        }
2944
        if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
2945
            myname.setFormStr(atomisedMap.get("dwcranks:formepithet"));
2946
        }
2947
        if (atomisedMap.get(PUBLICATION_YEAR) != null){
2948
        	myname.setPublicationYear(Integer.valueOf(atomisedMap.get(PUBLICATION_YEAR)));
2949
        }
2950
    }
2951

    
2952
    /**
2953
     * @see #createUnparsedSynonymNew(Rank, String, HashMap, MyName)
2954
     * @param rank
2955
     * @param newName
2956
     * @param atomisedMap
2957
     * @param myname
2958
     */
2959
    private void createUnparsedSynonym(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
2960
        logger.info("createSynonym");
2961
        //System.out.println("createsynonym");
2962
        if(rank.equals(Rank.UNKNOWN_RANK())){
2963
            myname.setNotParsableTaxon(newName);
2964
        }else{
2965
	        if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY()) && rank.equals(Rank.FAMILY())){
2966
	            myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
2967
	        }
2968
	        if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY()) && rank.equals(Rank.SUBFAMILY())){
2969
	            myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
2970
	        }
2971
	        if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE()) && rank.equals(Rank.TRIBE())){
2972
	            myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
2973
	        }
2974
	        if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE()) && rank.equals(Rank.SUBTRIBE())){
2975
	            myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
2976
	        }
2977
	        if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS()) && rank.equals(Rank.GENUS())){
2978
	            myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
2979
	        }
2980
	        if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2981
	            myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
2982
	        }
2983
	        if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2984
	            myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
2985
	        }
2986
	        if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES()) && rank.equals(Rank.SPECIES())){
2987
	            String n=newName;
2988
	            if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2989
	                n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2990
	                n=n.replace("subsp.","");
2991
	            }
2992
	            if(atomisedMap.get("dwc:subspecies") != null) {
2993
	                n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2994
	                n=n.replace("subsp.","");
2995
	            }
2996
	            if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2997
	                n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2998
	                n=n.replace("var.","");
2999
	                n=n.replace("v.","");
3000
	            }
3001
	            if(atomisedMap.get("dwcranks:formepithet") != null) {
3002
	                //TODO
3003
	                //System.out.println("TODO FORMA");
3004
	                n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3005
	                n=n.replace("forma","");
3006
	            }
3007
	            n=n.trim();
3008
	            String author = myname.getAuthor();
3009
	            if(n.split(" ").length>2){
3010

    
3011
	                String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3012
	                String a="";
3013
	                try{
3014
	                    a= n.split(n2)[1].trim();
3015
	                }catch(Exception e){logger.info("no author in "+n);}
3016
	                myname.setAuthor(a);
3017
	                //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3018
	                n=n2;
3019

    
3020
	            }
3021
	            Taxon species = myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank);
3022
	            myname.setSpecies(species);
3023
	            myname.setAuthor(author);
3024
	        }
3025
	        if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3026
	            myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3027
	        }
3028
	        if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3029
	            myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3030
	        }
3031
	        if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY()) && rank.equals(Rank.VARIETY())){
3032
	            myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3033
	        }
3034
	        if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM()) && rank.equals(Rank.FORM())){
3035
	            myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3036
	        }
3037
        }
3038

    
3039
    }
3040

    
3041

    
3042
    /**
3043
     * @param refMods
3044
     * @see #createUnparsedSynonym(Rank, String, HashMap, MyName)
3045
     * the original TaxonXImport extracted Synonyms by creating acc Taxa with partial names
3046
     * I (AM) do not understand this but don't want to destroy code which maybe works in some cases) there
3047
     * I created this switch for old
3048
     * for Spiders the new version is preferred
3049
     */
3050
    private void createUnparsedSynonymNew(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname, Reference refMods) {
3051
        logger.info("createSynonym");
3052

    
3053
        NonViralName<?> nameToBeFilled = this.getNonViralNameAccNomenclature();
3054
        //System.out.println("createsynonym");
3055
        if(rank.equals(Rank.UNKNOWN_RANK())){
3056
            //TODO
3057
        	myname.setNotParsableTaxon(newName);
3058

    
3059
        	nameToBeFilled.setTitleCache(newName, true);
3060
        }else{
3061
        	if(atomisedMap.get("dwc:genus") != null ){
3062
    			nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:genus"));
3063
	        }
3064
        	if (rank.isSupraGeneric()){
3065
        		if (atomisedMap.get("dwcranks:subtribe") != null ){
3066
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3067
    	        }else if (atomisedMap.get("dwcranks:subtribe") != null ){
3068
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3069
    	        }else if (atomisedMap.get("dwcranks:tribe") != null ){
3070
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:tribe"));
3071
    	        }else if (atomisedMap.get("dwcranks:subfamily") != null ){
3072
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subfamily"));
3073
    	        }else if (atomisedMap.get("dwc:family") != null ){
3074
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:family"));
3075
        	    }else{
3076
        	    	logger.warn("Supra generic rank not yet handled or atomisation not available");
3077
        	    }
3078
        	}
3079
        	if (atomisedMap.get("dwcranks:subgenus") != null){
3080
        		nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwcranks:subgenus"));
3081
        	}
3082
        	if (atomisedMap.get("dwc:subgenus") != null){
3083
        		nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwc:subgenus"));
3084
        	}
3085
        	if (atomisedMap.get("dwc:species") != null){
3086
        		nameToBeFilled.setSpecificEpithet(atomisedMap.get("dwc:species"));
3087
        	}
3088
        	if (atomisedMap.get("dwcranks:formepithet") != null){
3089
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:formepithet"));
3090
        	}else if (atomisedMap.get("dwcranks:varietyepithet") != null){
3091
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:varietyepithet"));
3092
        	}else if (atomisedMap.get("dwc:infraspecificepithet") != null){
3093
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:infraspecificepithet"));
3094
        	}else if (atomisedMap.get("dwc:subspecies") != null){
3095
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:subspecies"));
3096
        	}
3097
            Reference sec = sourceUrlRef;
3098
            if(!state2.getConfig().doKeepOriginalSecundum()){
3099
                sec = state2.getConfig().getSecundum();
3100
            }
3101
        	Synonym syn = Synonym.NewInstance(nameToBeFilled, sec);
3102
//        	sourceHandler.addSource(refMods, syn);
3103
        	myname.setSyno(syn);
3104
        	myname.setSynonym(true);
3105
        }
3106
	}
3107

    
3108
    /**
3109
     * @param rank
3110
     * @param newName
3111
     * @param atomisedMap
3112
     * @param myname
3113
     */
3114
    private void createAtomisedTaxon(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
3115
        logger.info("createAtomisedTaxon "+atomisedMap);
3116
        if(rank.equals(Rank.UNKNOWN_RANK())){
3117
            myname.setNotParsableTaxon(newName);
3118
        }
3119
        else{
3120
            if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
3121
                myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
3122
            }
3123
            if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY())){
3124
                myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
3125
            }
3126
            if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
3127
                myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
3128
            }
3129
            if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
3130
                myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
3131
            }
3132
            if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
3133
                myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
3134
            }
3135
            if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3136
                myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
3137
            }
3138
            if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3139
                myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
3140
            }
3141
            if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
3142
                String n=newName;
3143
                if(atomisedMap.get("dwc:infraspecificepithet") != null) {
3144
                    n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
3145
                    n=n.replace("subsp.","");
3146
                }
3147
                if(atomisedMap.get("dwc:subspecies") != null) {
3148
                    n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
3149
                    n=n.replace("subsp.","");
3150
                }
3151
                if(atomisedMap.get("dwcranks:varietyepithet") != null) {
3152
                    n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
3153
                    n=n.replace("var.","");
3154
                    n=n.replace("v.","");
3155
                }
3156
                if(atomisedMap.get("dwcranks:formepithet") != null) {
3157
                    //TODO
3158
                    //System.out.println("TODO FORMA");
3159
                    n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3160
                    n=n.replace("forma","");
3161
                }
3162
                n=n.trim();
3163
                String author = myname.getAuthor();
3164
                if(n.split(" ").length>2){
3165
                    String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3166
                    String a="";
3167
                    try{
3168
                        a= n.split(n2)[1].trim();
3169
                    }catch(Exception e){logger.info("no author  in "+n);}
3170
                    myname.setAuthor(a);
3171
                    //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3172
                    n=n2;
3173

    
3174
                }
3175

    
3176
                myname.setSpecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank));
3177
                myname.setAuthor(author);
3178
            }
3179
            if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3180
                myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3181
            }
3182
            if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3183
                myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3184
            }
3185
            if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
3186
                myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3187
            }
3188
            if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
3189
                myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3190
            }
3191
        }
3192
    }
3193

    
3194
    /**
3195
     * @return
3196
     */
3197
    private boolean checkRankValidForImport(Rank currentRank) {
3198
        //logger.info("checkRankValidForImport");
3199
        return currentRank.isLower(state2.getConfig().getMaxRank()) || currentRank.equals(state2.getConfig().getMaxRank());
3200
    }
3201

    
3202

    
3203

    
3204
    /**
3205
     * @param classification2
3206
     */
3207
    public void updateClassification(Classification classification2) {
3208
        //logger.info("updateClassification");
3209
        classification = classification2;
3210
    }
3211

    
3212
    /**
3213
     * @param tnb
3214
     * cast the current taxonnamebase into a botanical name or zoological or bacterial name
3215
     * if errors, cast into a classis nonviralname
3216
     * @param taxonnamebase2
3217
     */
3218
    @SuppressWarnings("rawtypes")
3219
    public NonViralName<?> castTaxonNameBase(TaxonNameBase tnb, NonViralName<?> nvn) {
3220

    
3221
    	//logger.info("castTaxonNameBase");
3222
        NonViralName<?> taxonnamebase2 = nvn;
3223
        if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
3224
            try{
3225
                taxonnamebase2=(BotanicalName) tnb;
3226
            }catch(Exception e){
3227
                taxonnamebase2= (NonViralName<?>) tnb;
3228
            }
3229
        }
3230
        if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
3231
            try{
3232
                taxonnamebase2=(ZoologicalName) tnb;
3233
            }catch(Exception e){
3234
                taxonnamebase2= (NonViralName<?>) tnb;
3235
            }
3236
        }
3237
        if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
3238
            try{
3239
                taxonnamebase2=(BacterialName) tnb;
3240
            }catch(Exception e){
3241
                taxonnamebase2= (NonViralName<?>) tnb;
3242
            }
3243
        }
3244
        return taxonnamebase2;
3245
    }
3246

    
3247
    /**
3248
     * @param tnb
3249
     * cast the current taxonnamebase into a botanical name or zoological or bacterial name
3250
     * if errors, cast into a classis nonviralname
3251
     * @param taxonnamebase2
3252
     */
3253
    @SuppressWarnings("rawtypes")
3254
    public NonViralName<?> castTaxonNameBase(TaxonNameBase tnb) {
3255
        //logger.info("castTaxonNameBase2");
3256
        NonViralName<?> taxonnamebase2 = null;
3257
        tnb=CdmBase.deproxy(tnb, TaxonNameBase.class);
3258
        if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
3259
            try{
3260
                taxonnamebase2=(BotanicalName) tnb;
3261
            }catch(Exception e){
3262
                taxonnamebase2= (NonViralName<?>) tnb;
3263
            }
3264
        }
3265
        if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
3266
            try{
3267
                taxonnamebase2=(ZoologicalName) tnb;
3268
            }catch(Exception e){
3269
                taxonnamebase2= (NonViralName<?>) tnb;
3270
            }
3271
        }
3272
        if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
3273
            try{
3274
                taxonnamebase2=(BacterialName) tnb;
3275
            }catch(Exception e){
3276
                taxonnamebase2= (NonViralName<?>) tnb;
3277
            }
3278
        }
3279
        return taxonnamebase2;
3280
    }
3281

    
3282
    public class MyName {
3283
        /**
3284
         * @param isSynonym
3285
         */
3286
        public MyName(boolean isSynonym) {
3287
            super();
3288
            this.isSynonym = isSynonym;
3289
        }
3290

    
3291
        String originalName="";
3292
        String newName="";
3293
        Rank rank=Rank.UNKNOWN_RANK();
3294
        String identifier="";
3295
        String status="";
3296
        String author=null;
3297

    
3298
        NonViralName<?> taxonNameBase;
3299

    
3300
        Reference refMods ;
3301

    
3302
        Taxon family,subfamily,tribe,subtribe,genus,subgenus,species,subspecies, variety,form;
3303
        NonViralName<?> familyName, subfamilyName, tribeName,subtribeName,genusName,subgenusName,speciesName,subspeciesName;
3304
        String familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr;
3305
        Integer publicationYear;
3306

    
3307

    
3308
		Taxon higherTaxa;
3309
        Rank higherRank;
3310
        private Taxon taxon;
3311
        private Synonym syno;
3312

    
3313
        /**
3314
         * @return the syno
3315
         */
3316
        public Synonym getSyno() {
3317
            return syno;
3318
        }
3319

    
3320
        @Override
3321
        public String toString(){
3322
            List<String> tot=new ArrayList<String>();
3323
            String[] n= {familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr};
3324
            for (String elt:n){
3325
                if (!StringUtils.isEmpty(elt)) {
3326
                    tot.add(elt);
3327
                } else {
3328
                    tot.add("*");
3329
                }
3330
            }
3331
            return StringUtils.join(tot," ");
3332
        }
3333
        /**
3334
         * @param syno the syno to set
3335
         */
3336
        public void setSyno(Synonym syno) {
3337
            this.syno = syno;
3338
        }
3339

    
3340
        boolean isSynonym=false;
3341

    
3342
        /**
3343
         * @return the isSynonym
3344
         */
3345
        public boolean isSynonym() {
3346
            return isSynonym;
3347
        }
3348

    
3349
        /**
3350
         * @param isSynonym the isSynonym to set
3351
         */
3352
        public void setSynonym(boolean isSynonym) {
3353
            this.isSynonym = isSynonym;
3354
        }
3355

    
3356
        public void setSource(Reference re){
3357
            refMods=re;
3358
        }
3359

    
3360
        /**
3361
         * @param string
3362
         */
3363
        public void setFormStr(String string) {
3364
            this.formStr=string;
3365

    
3366
        }
3367
        /**
3368
         * @param string
3369
         */
3370
        public void setVarietyStr(String string) {
3371
            this.varietyStr=string;
3372

    
3373
        }
3374
        /**
3375
         * @param string
3376
         */
3377
        public void setSubspeciesStr(String string) {
3378
            this.subspeciesStr=string;
3379

    
3380
        }
3381
        /**
3382
         * @param string
3383
         */
3384
        public void setSpeciesStr(String string) {
3385
            this.speciesStr=string;
3386

    
3387
        }
3388
        /**
3389
         * @param string
3390
         */
3391
        public void setSubgenusStr(String string) {
3392
            this.subgenusStr=string;
3393

    
3394
        }
3395
        /**
3396
         * @param string
3397
         */
3398
        public void setGenusStr(String string) {
3399
            this.genusStr=string;
3400

    
3401
        }
3402
        /**
3403
         * @param string
3404
         */
3405
        public void setSubtribeStr(String string) {
3406
            this.subtribeStr=string;
3407

    
3408
        }
3409
        /**
3410
         * @param string
3411
         */
3412
        public void setTribeStr(String string) {
3413
            this.tribeStr=string;
3414

    
3415
        }
3416
        /**
3417
         * @param string
3418
         */
3419
        public void setSubfamilyStr(String string) {
3420
            this.subfamilyStr=string;
3421

    
3422
        }
3423
        /**
3424
         * @param string
3425
         */
3426
        public void setFamilyStr(String string) {
3427
            this.familyStr=string;
3428

    
3429
        }
3430
        /**
3431
         * @return the familyStr
3432
         */
3433
        public String getFamilyStr() {
3434
            return familyStr;
3435
        }
3436
        /**
3437
         * @return the subfamilyStr
3438
         */
3439
        public String getSubfamilyStr() {
3440
            return subfamilyStr;
3441
        }
3442
        /**
3443
         * @return the tribeStr
3444
         */
3445
        public String getTribeStr() {
3446
            return tribeStr;
3447
        }
3448
        /**
3449
         * @return the subtribeStr
3450
         */
3451
        public String getSubtribeStr() {
3452
            return subtribeStr;
3453
        }
3454
        /**
3455
         * @return the genusStr
3456
         */
3457
        public String getGenusStr() {
3458
            return genusStr;
3459
        }
3460
        /**
3461
         * @return the subgenusStr
3462
         */
3463
        public String getSubgenusStr() {
3464
            return subgenusStr;
3465
        }
3466
        /**
3467
         * @return the speciesStr
3468
         */
3469
        public String getSpeciesStr() {
3470
            return speciesStr;
3471
        }
3472
        /**
3473
         * @return the subspeciesStr
3474
         */
3475
        public String getSubspeciesStr() {
3476
            return subspeciesStr;
3477
        }
3478
        /**
3479
         * @return the formStr
3480
         */
3481
        public String getFormStr() {
3482
            return formStr;
3483
        }
3484
        /**
3485
         * @return the varietyStr
3486
         */
3487
        public String getVarietyStr() {
3488
            return varietyStr;
3489
        }
3490

    
3491
        public Integer getPublicationYear() {
3492
			return publicationYear;
3493
		}
3494

    
3495
		public void setPublicationYear(Integer publicationYear) {
3496
			this.publicationYear = publicationYear;
3497
		}
3498

    
3499
        /**
3500
         * @param newName2
3501
         */
3502
        public void setNotParsableTaxon(String newName2) {
3503
            //takes too much time
3504
            //            List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3505

    
3506
            NomenclaturalStatusType statusType = null;
3507
            if (!getStatus().isEmpty()){
3508
                try {
3509
                    statusType = nomStatusString2NomStatus(getStatus());
3510
                } catch (UnknownCdmTypeException e) {
3511
                    addProblematicStatusToFile(getStatus());
3512
                    logger.warn("Problem with status");
3513
                }
3514
            }
3515
            List<TaxonBase> tmpList = new ArrayList<>();
3516

    
3517
            Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, newName2, MatchMode.BEGINNING, null, null, null, null, null);
3518
            tmpList.addAll(taxontest.getRecords());
3519

    
3520
            //logger.info("tmpList returned: "+tmpList.size());
3521

    
3522

    
3523
            INonViralName identicName = null;
3524
            boolean foundIdentic=false;
3525
            TaxonBase<?> tmpTaxonBase=null;
3526
            //            Taxon tmpPartial=null;
3527
            for (TaxonBase<?> tmpb:tmpList){
3528
                if(tmpb !=null){
3529
                    TaxonNameBase<?,?> tnb =  tmpb.getName();
3530
                    Rank crank=null;
3531
                    if (tnb != null){
3532
                        if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2) ){
3533
                            crank =tnb.getRank();
3534
                            if (crank !=null && rank !=null){
3535
                                if (crank.equals(rank)){
3536
                                	identicName = tnb;
3537
                                	if (isSynonym && tmpb.isInstanceOf(Synonym.class) || !isSynonym && tmpb.isInstanceOf(Taxon.class)){
3538
                                		foundIdentic=true;
3539
                                		tmpTaxonBase=tmpb;
3540
                               			break;
3541
                                	}
3542
                                }
3543
                            }
3544
                        }
3545
                    }
3546
                }
3547
            }
3548
            boolean statusMatch=false;
3549
            boolean appendedMatch=false;
3550
            if(tmpTaxonBase !=null && foundIdentic){
3551
                statusMatch=compareStatus(tmpTaxonBase, statusType);
3552
                if (!getStatus().isEmpty() && ! (tmpTaxonBase.getAppendedPhrase() == null)) {
3553
                    appendedMatch=tmpTaxonBase.getAppendedPhrase().equals(getStatus());
3554
                }
3555
                if (getStatus().isEmpty() && tmpTaxonBase.getAppendedPhrase() == null) {
3556
                    appendedMatch=true;
3557
                }
3558

    
3559
            }
3560
            if ((tmpTaxonBase == null || !foundIdentic) ||  (tmpTaxonBase != null && !statusMatch) ||  (tmpTaxonBase != null && !appendedMatch && !statusMatch)){
3561

    
3562
            	INonViralName tnb;
3563
            	if (identicName == null){
3564
            		tnb = getNonViralNameAccNomenclature();
3565
            		tnb.setRank(rank);
3566

    
3567
	                if(statusType != null) {
3568
	                    tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3569
	                }
3570
	                if(StringUtils.isNotBlank(getStatus())) {
3571
	                    tnb.setAppendedPhrase(getStatus());
3572
	                }
3573
	                tnb.setTitleCache(newName2,true);
3574
	                tmpTaxonBase = findMatchingTaxon(tnb,refMods);
3575
	            }else{
3576
            		tnb = identicName;
3577
            	}
3578

    
3579
                if(tmpTaxonBase==null){
3580
                    tmpTaxonBase = isSynonym ? Synonym.NewInstance(tnb, refMods) : Taxon.NewInstance(tnb, refMods);
3581
                    if(!state2.getConfig().doKeepOriginalSecundum()) {
3582
                        tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3583
                    }
3584
                    //tmptaxonbase.setSec(refMods);
3585
                    if(!isSynonym) {
3586
                        classification.addChildTaxon((Taxon)tmpTaxonBase, null, null);
3587
                        sourceHandler.addSource(refMods, (Taxon)tmpTaxonBase);
3588
                    }
3589
                }
3590
            }
3591

    
3592
            tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3593
            if (author != null) {
3594
                if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3595
                    setLSID(getIdentifier(), tmpTaxonBase);
3596
                    importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3597
                    tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3598
                }
3599
            }
3600
            TaxonNameBase<?,?> tnb = CdmBase.deproxy(tmpTaxonBase.getName(), TaxonNameBase.class);
3601

    
3602
            if(!isSynonym) {
3603
                this.taxon=(Taxon)tmpTaxonBase;
3604
            } else {
3605
                if (tmpTaxonBase instanceof Taxon){
3606
                	logger.warn("Incorrect status");
3607
                }
3608
            	this.syno=(Synonym)tmpTaxonBase;
3609
            }
3610

    
3611
            taxonNameBase = castTaxonNameBase(tnb, taxonNameBase);
3612

    
3613
        }
3614

    
3615
        /**
3616
         *
3617
         */
3618
        public void buildTaxon() {
3619
            //System.out.println("BUILD TAXON");
3620
            logger.info("buildTaxon");
3621
            NomenclaturalStatusType statusType = null;
3622
            if (!getStatus().isEmpty()){
3623
            	status = getStatus();
3624
            	String newNameStatus = newNameStatus(status);
3625
            	if (newNameStatus != null){
3626
            		taxonNameBase.setAppendedPhrase(newNameStatus);
3627
            	}else{
3628
            		try {
3629
            			statusType = nomStatusString2NomStatus(getStatus());
3630
            			taxonNameBase.addStatus(NomenclaturalStatus.NewInstance(statusType));
3631
            		} catch (UnknownCdmTypeException e) {
3632
            			addProblematicStatusToFile(getStatus());
3633
            			logger.warn("Problem with status");
3634
            		}
3635
            	}
3636
            }
3637
            importer.getNameService().save(taxonNameBase);
3638

    
3639
            TaxonBase<?> tmpTaxonBase;
3640
            if (!isSynonym) {
3641
                tmpTaxonBase =Taxon.NewInstance(taxonNameBase, refMods); //sec set null
3642
            }
3643
            else {
3644
                tmpTaxonBase =Synonym.NewInstance(taxonNameBase, refMods); //sec set null
3645
            }
3646
            boolean exist = false;
3647
            if (!isSynonym){
3648
	            for (TaxonNode node : classification.getAllNodes()){
3649
	                try{
3650
	                	Taxon nodeTaxon = node.getTaxon();
3651
	                	boolean titleMatches = nodeTaxon.getTitleCache().equalsIgnoreCase(tmpTaxonBase.getTitleCache());
3652
	                	boolean nomStatusMatches = compareStatus(node.getTaxon(), statusType);
3653
	                	boolean nodeNameReplaceable = checkNodeNameReplaceable(nodeTaxon, tmpTaxonBase);
3654
	                    if(titleMatches && nomStatusMatches) {
3655
	                    	if (!isSynonym) {
3656
	                    		tmpTaxonBase=CdmBase.deproxy(nodeTaxon, TaxonBase.class);
3657
	                            exist =true;
3658
	                        } else {
3659
	                            logger.info("Found the same name but from another type (taxon/synonym)");
3660
	                            TaxonNameBase<?,?> existingTnb = getTaxon().getName();
3661
                                tmpTaxonBase = Synonym.NewInstance(existingTnb, refMods);
3662
                                importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3663
                                exist =true;
3664
                            }
3665
	                    }else if (nodeNameReplaceable){
3666
	                    	nodeTaxon.setName(tmpTaxonBase.getName());
3667
	                    	tmpTaxonBase = nodeTaxon;
3668
	                    	exist = true;
3669
	                    }
3670
	                }catch(NullPointerException n){logger.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3671
	            }
3672
            }
3673
            if (!exist){
3674

    
3675
                boolean insertAsExisting =false;
3676
                List<Taxon> existingTaxons=new ArrayList<Taxon>();
3677
                try {
3678
                    existingTaxons = getMatchingTaxa(taxonNameBase);
3679
                } catch (Exception e1) {
3680
                    e1.printStackTrace();
3681
                }
3682
                double similarityScore=0.0;
3683
                double similarityAuthor=-1;
3684
                String author1="";
3685
                String author2="";
3686
                String t1="";
3687
                String t2="";
3688
                for (Taxon bestMatchingTaxon : existingTaxons){
3689
                    //System.out.println("tnbase "+taxonnamebase.getTitleCache());
3690
                    //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3691
                    if(taxonNameBase.getAuthorshipCache()!=null) {
3692
                    	author1=taxonNameBase.getAuthorshipCache();
3693
                    }
3694
                    try {
3695
                        if(castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache()!=null) {
3696
                            author2=castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache();
3697
                        }
3698
                    } catch (Exception e) {
3699
                        // TODO Auto-generated catch block
3700
                        e.printStackTrace();
3701
                    }
3702
                    try {
3703
                        t1=taxonNameBase.getTitleCache();
3704
                        if (author1!=null && !StringUtils.isEmpty(author1)) {
3705
                            t1=t1.split(Pattern.quote(author1))[0];
3706
                        }
3707
                    } catch (Exception e) {
3708
                        // TODO Auto-generated catch block
3709
                        e.printStackTrace();
3710
                    }
3711
                    try {
3712
                        t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
3713
                        if (author2!=null && !StringUtils.isEmpty(author2)) {
3714
                            t2=t2.split(Pattern.quote(author2))[0];
3715
                        }
3716
                    } catch (Exception e) {
3717
                        // TODO Auto-generated catch block
3718
                        e.printStackTrace();
3719
                    }
3720

    
3721
                    similarityScore=similarity(t1.trim(), t2.trim());
3722
                    //System.out.println("taxonscore "+similarityScore);
3723
                    similarityAuthor=similarity(author1.trim(), author2.trim());
3724
                    //System.out.println("authorscore "+similarityAuthor);
3725
                    insertAsExisting = compareAndCheckTaxon(taxonNameBase, refMods, similarityScore, bestMatchingTaxon, similarityAuthor);
3726
                    if(insertAsExisting) {
3727
                        tmpTaxonBase=bestMatchingTaxon;
3728
                        break;
3729
                    }
3730
                }
3731
                if ( !insertAsExisting ){
3732
                    if(!state2.getConfig().doKeepOriginalSecundum()) {
3733
                        tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3734
                    }
3735

    
3736
                    //                    tmptaxonbase.setSec(refMods);
3737
                    if (taxonNameBase.getRank().equals(state2.getConfig().getMaxRank())) {
3738
                        //System.out.println("****************************"+tmptaxonbase);
3739
                        if (!isSynonym) {
3740
                            classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3741
                        }
3742
                    } else{
3743
                        hierarchy = new HashMap<Rank, Taxon>();
3744
                        //System.out.println("LOOK FOR PARENT "+taxonnamebase.toString()+", "+tmptaxonbase.toString());
3745
                        if (!isSynonym){
3746
                            lookForParentNode(taxonNameBase,(Taxon)tmpTaxonBase, refMods,this);
3747
                            //System.out.println("HIERARCHY "+hierarchy);
3748
                            Taxon parent = buildHierarchy();
3749
                            if(!taxonExistsInClassification(parent,(Taxon)tmpTaxonBase)){
3750
                                if(parent !=null) {
3751
                                    classification.addParentChild(parent, (Taxon)tmpTaxonBase, refMods, null);
3752
                                } else {
3753
                                    classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3754
                                }
3755
                                importer.getClassificationService().saveOrUpdate(classification);
3756
                            }
3757
                        }
3758
                        //                        Set<TaxonNode> nodeList = classification.getAllNodes();
3759
                        //                        for(TaxonNode tn:nodeList) {
3760
                        //                            System.out.println(tn.getTaxon());
3761
                        //                        }
3762
                    }
3763
                }
3764
                importer.getClassificationService().saveOrUpdate(classification);
3765
                 if(isSynonym) {
3766
                    try{
3767
                        Synonym castTest=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3768
                    }catch(Exception e){
3769
                        TaxonNameBase<?,?> existingTnb = tmpTaxonBase.getName();
3770
                        Synonym castTest = Synonym.NewInstance(existingTnb, refMods);
3771
                        importer.getTaxonService().saveOrUpdate(castTest);
3772
                        tmpTaxonBase=CdmBase.deproxy(castTest, Synonym.class);
3773
                    }
3774
                }
3775
            }
3776
            if(!isSynonym) {
3777
                taxon=CdmBase.deproxy(tmpTaxonBase, Taxon.class);
3778
            } else {
3779
                syno=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3780
            }
3781

    
3782
        }
3783

    
3784
		private boolean checkNodeNameReplaceable(Taxon nodeTaxon, TaxonBase<?> newTaxon) {
3785
			//TODO preliminary check
3786
			if (newTaxon.isInstanceOf(Synonym.class)){
3787
				return false;
3788
			}
3789
			NonViralName<?> nodeName = CdmBase.deproxy(nodeTaxon.getName(), NonViralName.class);
3790
			NonViralName<?> newName = CdmBase.deproxy(newTaxon.getName(), NonViralName.class);
3791
			if (nodeTaxon.getName() == null ||  newName == null){
3792
				return false;
3793
			}
3794
			if (nodeTaxon.getDescriptions().size() > 0 || nodeName.getDescriptions().size() > 0 || nodeName.getTypeDesignations().size() > 0 ){
3795
				return false;
3796
			}
3797
			boolean compare = true;
3798
			for (NomenclaturalStatus status : newName.getStatus() ){
3799
				compare &= compareStatus(nodeTaxon, status.getType());
3800
			}
3801
			if (! compare){
3802
				return false;
3803
			}
3804

    
3805
			if (nodeName.getNameCache() != null && nodeName.getNameCache().equals(newName.getNameCache())){
3806
				if (nodeName.getNameCache().equals(nodeName.getTitleCache())){
3807
					if (newName.getNameCache().length() < newName.getTitleCache().length()){
3808
						logger.warn("We still need to check, if node was automatically created via hierarchy creation: " + nodeName.getNameCache());
3809
						return true;
3810
					}
3811
				}
3812
			}
3813

    
3814
			return false;
3815
		}
3816

    
3817
		/**
3818
         *
3819
         */
3820
        private Taxon buildHierarchy() {
3821
            logger.info("buildHierarchy");
3822
            Taxon higherTaxon = null;
3823
            //add the maxRank as a root
3824
            if(hierarchy.containsKey(state2.getConfig().getMaxRank())){
3825
                Taxon ct=hierarchy.get(state2.getConfig().getMaxRank());
3826
                if(!taxonExistsInClassification(higherTaxon, ct)) {
3827
                   classification.addChildTaxon(ct, refMods, null);
3828
                }
3829
                higherTaxon = hierarchy.get(state2.getConfig().getMaxRank());
3830
                //                return higherTaxon;
3831
            }
3832
            //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3833

    
3834
            //TODO higher Ranks
3835

    
3836
            if(hierarchy.containsKey(Rank.FAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.FAMILY())){
3837
                higherTaxon=saveAndGetHigherTaxon(Rank.FAMILY(),higherTaxon);
3838
            }
3839
            if(hierarchy.containsKey(Rank.SUBFAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.SUBFAMILY())){
3840
                higherTaxon=saveAndGetHigherTaxon(Rank.SUBFAMILY(),higherTaxon);
3841
            }
3842
            if(hierarchy.containsKey(Rank.TRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.TRIBE())){
3843
                higherTaxon=saveAndGetHigherTaxon(Rank.TRIBE(),higherTaxon);
3844
            }
3845
            if(hierarchy.containsKey(Rank.SUBTRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.SUBTRIBE())){
3846
                higherTaxon=saveAndGetHigherTaxon(Rank.SUBTRIBE(),higherTaxon);
3847
            }
3848
            if(hierarchy.containsKey(Rank.GENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3849
                higherTaxon=saveAndGetHigherTaxon(Rank.GENUS(),higherTaxon);
3850
            }
3851
            if(hierarchy.containsKey(Rank.SUBGENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3852
                higherTaxon=saveAndGetHigherTaxon(Rank.SUBGENUS(),higherTaxon);
3853
            }
3854
            importer.getClassificationService().saveOrUpdate(classification);
3855
            return higherTaxon;
3856
        }
3857

    
3858
        private Taxon saveAndGetHigherTaxon(Rank r, Taxon higherTaxon){
3859
            Taxon ct=hierarchy.get(r);
3860
            if(!taxonExistsInClassification(higherTaxon,ct )) {
3861
                if(higherTaxon != null && ct!=null) {
3862
                    classification.addParentChild(higherTaxon, ct, refMods, null);
3863
                } else
3864
                    if(higherTaxon == null && ct !=null) {
3865
                        classification.addChildTaxon(ct, refMods, null);
3866
                }
3867
            }
3868
            return ct;
3869
        }
3870

    
3871
        private boolean taxonExistsInClassification(Taxon parent, Taxon child){
3872
            logger.info("taxonExistsInClassification");
3873
            //            System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3874
            boolean found=false;
3875
            if(parent !=null){
3876
                for (TaxonNode p : classification.getAllNodes()){
3877
                    if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
3878
                        for (TaxonNode c : p.getChildNodes()) {
3879
                            if (c.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3880
                                found=true;
3881
                                break;
3882
                            }
3883
                        }
3884
                    }
3885
                }
3886
            }
3887
            else{
3888
                for (TaxonNode p : classification.getAllNodes()){
3889
                    if(p.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3890
                        found=true;
3891
                        break;
3892
                    }
3893
                }
3894
            }
3895
            //            System.out.println("LOOK IF TAXA EXIST? "+found);
3896
            return found;
3897
        }
3898
        /**
3899
         * @param nameToBeFilledTest
3900
         */
3901
        @SuppressWarnings("rawtypes")
3902
        public void setParsedName(ITaxonNameBase nameToBeFilledTest) {
3903
            this.taxonNameBase = (NonViralName<?>) nameToBeFilledTest;
3904

    
3905
        }
3906
        //variety dwcranks:varietyEpithet
3907
        /**
3908
         * @return the author
3909
         */
3910
        public String getAuthor() {
3911
            return author;
3912
        }
3913
        /**
3914
         * @return
3915
         */
3916
        public Taxon getTaxon() {
3917
            return taxon;
3918
        }
3919
        /**
3920
         * @return
3921
         */
3922
        public NonViralName<?> getTaxonNameBase() {
3923
            return taxonNameBase;
3924
        }
3925

    
3926
        /**
3927
         * @param findOrCreateTaxon
3928
         */
3929
        public void setForm(Taxon form) {
3930
            this.form=form;
3931

    
3932
        }
3933
        /**
3934
         * @param findOrCreateTaxon
3935
         */
3936
        public void setVariety(Taxon variety) {
3937
            this.variety=variety;
3938

    
3939
        }
3940
        /**
3941
         * @param string
3942
         * @return
3943
         */
3944
        @SuppressWarnings("rawtypes")
3945
        public Taxon findOrCreateTaxon(String partialname,String fullname, Rank rank, Rank globalrank) {
3946
            logger.info("findOrCreateTaxon");
3947
            sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
3948
            //takes too much time
3949
            //            List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3950
            //            logger.info("tmpList returned: "+tmpList.size());
3951

    
3952
            NomenclaturalStatusType statusType = null;
3953
            if (!getStatus().isEmpty()){
3954
                try {
3955
                    statusType = nomStatusString2NomStatus(getStatus());
3956
                } catch (UnknownCdmTypeException e) {
3957
                    addProblematicStatusToFile(getStatus());
3958
                    logger.warn("Problem with status");
3959
                }
3960
            }
3961

    
3962
            List<TaxonBase> tmpListFiltered = new ArrayList<TaxonBase>();
3963

    
3964
            Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, fullname, MatchMode.BEGINNING, null, null, null, null, null);
3965

    
3966
            tmpListFiltered.addAll(taxontest.getRecords());
3967
            taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, partialname, MatchMode.BEGINNING, null, null, null, null, null);
3968
            tmpListFiltered.addAll(taxontest.getRecords());
3969

    
3970
            //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3971

    
3972
            boolean nameCorrected=false;
3973
            if (fullname.indexOf(partialname)<0) {
3974
                nameCorrected=true;
3975
            }
3976

    
3977
            boolean foundIdentic=false;
3978
            Taxon tmp=null;
3979
            for (TaxonBase tmpb:tmpListFiltered){
3980
                if(tmpb !=null){
3981
                    TaxonNameBase tnb =  tmpb.getName();
3982
                    Rank crank=null;
3983
                    if (tnb != null){
3984
                         if(globalrank.equals(rank) || (globalrank.isLower(Rank.SPECIES()) && rank.equals(Rank.SPECIES()))){
3985
                            if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname) ){
3986
                                crank =tnb.getRank();
3987
                                if (crank !=null && rank !=null){
3988
                                    if (crank.equals(rank)){
3989
                                        foundIdentic=true;
3990
                                        try{
3991
                                            tmp=(Taxon)tmpb;
3992
                                            break;
3993
                                        }catch(Exception e){
3994
                                            e.printStackTrace();
3995
                                        }
3996
                                    }
3997
                                }
3998
                            }
3999
                            if(nameCorrected){ //for corrected names such as Anochetus -- A. blf-pat
4000
                                if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
4001
                                    crank =tnb.getRank();
4002
                                    if (crank !=null && rank !=null){
4003
                                        if (crank.equals(rank)){
4004
                                            foundIdentic=true;
4005
                                            try{
4006
                                                tmp=(Taxon)tmpb;
4007
                                                break;
4008
                                            }catch(Exception e){
4009
                                                e.printStackTrace();
4010
                                            }
4011
                                        }
4012
                                    }
4013
                                }
4014
                            }
4015
                        }
4016
                        else{
4017
                            if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
4018
                                crank =tnb.getRank();
4019
                                if (crank !=null && rank !=null){
4020
                                    if (crank.equals(rank)){
4021
                                        foundIdentic=true;
4022
                                        try{
4023
                                            tmp=(Taxon)tmpb;
4024
                                            break;
4025
                                        }catch(Exception e){
4026
                                            e.printStackTrace();
4027
                                        }
4028
                                    }
4029
                                }
4030
                            }
4031
                        }
4032
                    }
4033
                }
4034
            }
4035
            boolean statusMatch=false;
4036
            boolean appendedMatch=false;
4037
            if(tmp !=null && foundIdentic){
4038
                statusMatch=compareStatus(tmp, statusType);
4039
                if (!getStatus().isEmpty() && ! (tmp.getAppendedPhrase() == null)) {
4040
                    appendedMatch=tmp.getAppendedPhrase().equals(getStatus());
4041
                }
4042
                if (getStatus().isEmpty() && tmp.getAppendedPhrase() == null) {
4043
                    appendedMatch=true;
4044
                }
4045

    
4046
            }
4047
            if ((tmp == null || !foundIdentic) ||  (tmp != null && !statusMatch) ||  (tmp != null && !appendedMatch && !statusMatch)){
4048

    
4049
                NonViralName<?> tnb = getNonViralNameAccNomenclature();
4050
                tnb.setRank(rank);
4051

    
4052
                if(statusType != null) {
4053
                    tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
4054
                }
4055
                if(StringUtils.isNotBlank(getStatus())) {
4056
                    tnb.setAppendedPhrase(getStatus());
4057
                }
4058

    
4059
                if(rank.equals(Rank.UNKNOWN_RANK())){
4060
                    tnb.setTitleCache(fullname, true);
4061
                    //                    tnb.setGenusOrUninomial(fullname);
4062
                }
4063
                if(rank.isHigher(Rank.GENUS())) {
4064
                    tnb.setGenusOrUninomial(partialname);
4065
                }
4066

    
4067
                if(rank.isHigher(Rank.SPECIES())) {
4068
                    tnb.setTitleCache(partialname, true);
4069
                }
4070

    
4071
                if (rank.equals(globalrank) && author != null) {
4072

    
4073
                    tnb.setCombinationAuthorship(findOrCreateAuthor(author));
4074
                    if (getIdentifier() !=null && !getIdentifier().isEmpty()){
4075
                        Taxon taxonLSID = getTaxonByLSID(getIdentifier());
4076
                        if (taxonLSID !=null) {
4077
                            tmp=taxonLSID;
4078
                        }
4079
                    }
4080
                }
4081

    
4082
                if(tmp == null){
4083
                    if (rank.equals(Rank.FAMILY())) {
4084
                        tmp = buildFamily(tnb);
4085
                    }
4086
                    if (rank.equals(Rank.SUBFAMILY())) {
4087
                        tmp = buildSubfamily(tnb);
4088
                    }
4089
                    if (rank.equals(Rank.TRIBE())) {
4090
                        tmp = buildTribe(tnb);
4091
                    }
4092
                    if (rank.equals(Rank.SUBTRIBE())) {
4093
                        tmp = buildSubtribe(tnb);
4094
                    }
4095
                    if (rank.equals(Rank.GENUS())) {
4096
                        tmp = buildGenus(partialname, tnb);
4097
                    }
4098

    
4099
                    if (rank.equals(Rank.SUBGENUS())) {
4100
                        tmp = buildSubgenus(partialname, tnb);
4101
                    }
4102
                    if (rank.equals(Rank.SPECIES())) {
4103
                        tmp = buildSpecies(partialname, tnb);
4104
                    }
4105

    
4106
                    if (rank.equals(Rank.SUBSPECIES())) {
4107
                        tmp = buildSubspecies(partialname, tnb);
4108
                    }
4109

    
4110
                    if (rank.equals(Rank.VARIETY())) {
4111
                        tmp = buildVariety(fullname, partialname, tnb);
4112
                    }
4113

    
4114
                    if (rank.equals(Rank.FORM())) {
4115
                        tmp = buildForm(fullname, partialname, tnb);
4116
                    }
4117
                    if (tmp != null){
4118
                    	TaxonXTreatmentExtractor.this.sourceHandler.addSource(refMods, tmp);
4119
                    }
4120

    
4121
                    importer.getClassificationService().saveOrUpdate(classification);
4122
                }
4123

    
4124
            }
4125

    
4126
            tmp = CdmBase.deproxy(tmp, Taxon.class);
4127
            if (rank.equals(globalrank) && author != null) {
4128
                if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4129
                    setLSID(getIdentifier(), tmp);
4130
                    importer.getTaxonService().saveOrUpdate(tmp);
4131
                    tmp = CdmBase.deproxy(tmp, Taxon.class);
4132
                }
4133
            }
4134

    
4135
            this.taxon=tmp;
4136

    
4137
            return tmp;
4138
        }
4139

    
4140
        /**
4141
         * @param tnb
4142
         * @return
4143
         */
4144
        private Taxon buildSubfamily(NonViralName<?> tnb) {
4145
            Taxon tmp;
4146
            //            tnb.generateTitle();
4147
            tmp = findMatchingTaxon(tnb,refMods);
4148
            if(tmp ==null){
4149
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4150
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4151
                    tmp.setSec(state2.getConfig().getSecundum());
4152
                }
4153
                //                tmp.setSec(refMods);
4154
                //                sourceHandler.addSource(refMods, tmp);
4155
                if(family != null) {
4156
                    classification.addParentChild(family, tmp, null, null);
4157
                    higherRank=Rank.FAMILY();
4158
                    higherTaxa=family;
4159
                } else {
4160
                    //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4161
                    classification.addChildTaxon(tmp, null, null);
4162
                }
4163
            }
4164
            return tmp;
4165
        }
4166
        /**
4167
         * @param tnb
4168
         * @return
4169
         */
4170
        private Taxon buildFamily(NonViralName<?> tnb) {
4171
            Taxon tmp;
4172
            //            tnb.generateTitle();
4173
            tmp = findMatchingTaxon(tnb,refMods);
4174
            if(tmp ==null){
4175
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4176
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4177
                    tmp.setSec(state2.getConfig().getSecundum());
4178
                }
4179
                //                tmp.setSec(refMods);
4180
                //sourceHandler.addSource(refMods, tmp);
4181
                //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4182
                classification.addChildTaxon(tmp, null, null);
4183
            }
4184
            return tmp;
4185
        }
4186
        /**
4187
         * @param fullname
4188
         * @param tnb
4189
         * @return
4190
         */
4191
        private Taxon buildForm(String fullname, String partialname, NonViralName<?> tnb) {
4192
            if (genusName !=null) {
4193
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4194
            }
4195
            if (subgenusName !=null) {
4196
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4197
            }
4198
            if(speciesName !=null) {
4199
                tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4200
            }
4201
            if(subspeciesName != null) {
4202
                tnb.setInfraSpecificEpithet(subspeciesName.getInfraSpecificEpithet());
4203
            }
4204
            if(partialname!= null) {
4205
                tnb.setInfraSpecificEpithet(partialname);
4206
            }
4207
             //TODO how to save form??
4208
            tnb.setTitleCache(fullname, true);
4209
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4210
            if(tmp ==null){
4211
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4212
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4213
                    tmp.setSec(state2.getConfig().getSecundum());
4214
                }
4215
                //                tmp.setSec(refMods);
4216
                //sourceHandler.addSource(refMods, tmp);
4217
                if (subspecies !=null) {
4218
                    classification.addParentChild(subspecies, tmp, null, null);
4219
                    higherRank=Rank.SUBSPECIES();
4220
                    higherTaxa=subspecies;
4221
                } else {
4222
                    if (species !=null) {
4223
                        classification.addParentChild(species, tmp, null, null);
4224
                        higherRank=Rank.SPECIES();
4225
                        higherTaxa=species;
4226
                    }
4227
                    else{
4228
                        //                        System.out.println("ADDCHILDTAXON FORM "+tmp);
4229
                        classification.addChildTaxon(tmp, null, null);
4230
                    }
4231
                }
4232
            }
4233
            return tmp;
4234
        }
4235
        /**
4236
         * @param fullname
4237
         * @param tnb
4238
         * @return
4239
         */
4240
        private Taxon buildVariety(String fullname, String partialname, NonViralName<?> tnb) {
4241
            Taxon tmp;
4242
            if (genusName !=null) {
4243
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4244
            }
4245
            if (subgenusName !=null) {
4246
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4247
            }
4248
            if(speciesName !=null) {
4249
                tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4250
            }
4251
            if(subspeciesName != null) {
4252
                tnb.setInfraSpecificEpithet(subspeciesName.getSpecificEpithet());
4253
            }
4254
            if(partialname != null) {
4255
                tnb.setInfraSpecificEpithet(partialname);
4256
            }
4257
            //TODO how to save variety?
4258
            tnb.setTitleCache(fullname, true);
4259
            tmp = findMatchingTaxon(tnb,refMods);
4260
            if(tmp ==null){
4261
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4262
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4263
                    tmp.setSec(state2.getConfig().getSecundum());
4264
                }
4265
                //                tmp.setSec(refMods);
4266
                //sourceHandler.addSource(refMods, tmp);
4267
                if (subspecies !=null) {
4268
                    classification.addParentChild(subspecies, tmp, null, null);
4269
                    higherRank=Rank.SUBSPECIES();
4270
                    higherTaxa=subspecies;
4271
                } else {
4272
                    if(species !=null) {
4273
                        classification.addParentChild(species, tmp, null, null);
4274
                        higherRank=Rank.SPECIES();
4275
                        higherTaxa=species;
4276
                    }
4277
                    else{
4278
                        //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4279
                        classification.addChildTaxon(tmp, null, null);
4280
                    }
4281
                }
4282
            }
4283
            return tmp;
4284
        }
4285
        /**
4286
         * @param partialname
4287
         * @param tnb
4288
         * @return
4289
         */
4290
        private Taxon buildSubspecies(String partialname, NonViralName<?> tnb) {
4291
            if (genusName !=null) {
4292
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4293
            }
4294
            if (subgenusName !=null) {
4295
                //                            System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4296
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4297
            }
4298
            if(speciesName !=null) {
4299
                //                            System.out.println("SPE:"+speciesName.getSpecificEpithet());
4300
                tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4301
            }
4302
            tnb.setInfraSpecificEpithet(partialname);
4303
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4304
            if(tmp ==null){
4305
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4306
                if(!state2.getConfig().doKeepOriginalSecundum())
4307
                 {
4308
                    tmp.setSec(state2.getConfig().getSecundum());
4309
                //                tmp.setSec(refMods);
4310
                //sourceHandler.addSource(refMods, tmp);
4311
                }
4312

    
4313
                if(species != null) {
4314
                    classification.addParentChild(species, tmp, null, null);
4315
                    higherRank=Rank.SPECIES();
4316
                    higherTaxa=species;
4317
                }
4318
                else{
4319
                    //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4320
                    classification.addChildTaxon(tmp, null, null);
4321
                }
4322
            }
4323
            return tmp;
4324
        }
4325
        /**
4326
         * @param partialname
4327
         * @param tnb
4328
         * @return
4329
         */
4330
        private Taxon buildSpecies(String partialname, NonViralName<?> tnb) {
4331
            if (genusName !=null) {
4332
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4333
            }
4334
            if (subgenusName !=null) {
4335
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4336
            }
4337
            tnb.setSpecificEpithet(partialname.toLowerCase());
4338
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4339
            if(tmp ==null){
4340
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4341
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4342
                    tmp.setSec(state2.getConfig().getSecundum());
4343
                }
4344
                //                tmp.setSec(refMods);
4345
                //sourceHandler.addSource(refMods, tmp);
4346
                if (subgenus !=null) {
4347
                    classification.addParentChild(subgenus, tmp, null, null);
4348
                    higherRank=Rank.SUBGENUS();
4349
                    higherTaxa=subgenus;
4350
                } else {
4351
                    if (genus !=null) {
4352
                        classification.addParentChild(genus, tmp, null, null);
4353
                        higherRank=Rank.GENUS();
4354
                        higherTaxa=genus;
4355
                    }
4356
                    else{
4357
                        //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4358
                        classification.addChildTaxon(tmp, null, null);
4359
                    }
4360
                }
4361
            }
4362
            return tmp;
4363
        }
4364
        /**
4365
         * @param partialname
4366
         * @param tnb
4367
         * @return
4368
         */
4369
        private Taxon buildSubgenus(String partialname, NonViralName<?> tnb) {
4370
            tnb.setInfraGenericEpithet(partialname);
4371
            if (genusName !=null) {
4372
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4373
            }
4374
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4375
            if(tmp ==null){
4376
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4377
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4378
                    tmp.setSec(state2.getConfig().getSecundum());
4379
                }
4380
                //                tmp.setSec(refMods);
4381
                //sourceHandler.addSource(refMods, tmp);
4382
                if(genus != null) {
4383
                    classification.addParentChild(genus, tmp, null, null);
4384
                    higherRank=Rank.GENUS();
4385
                    higherTaxa=genus;
4386
                } else{
4387
                    //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4388
                    classification.addChildTaxon(tmp, null, null);
4389
                }
4390
            }
4391
            return tmp;
4392
        }
4393
        /**
4394
         * @param partialname
4395
         * @param tnb
4396
         * @return
4397
         */
4398
        private Taxon buildGenus(String partialname, NonViralName<?> tnb) {
4399
            Taxon tmp;
4400
            tnb.setGenusOrUninomial(partialname);
4401

    
4402

    
4403
            tmp = findMatchingTaxon(tnb,refMods);
4404
            if(tmp ==null){
4405
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4406
                if(!state2.getConfig().doKeepOriginalSecundum())
4407
                 {
4408
                    tmp.setSec(state2.getConfig().getSecundum());
4409
                //                tmp.setSec(refMods);
4410
                //sourceHandler.addSource(refMods, tmp);
4411
                }
4412

    
4413
                if(subtribe != null) {
4414
                    classification.addParentChild(subtribe, tmp, null, null);
4415
                    higherRank=Rank.SUBTRIBE();
4416
                    higherTaxa=subtribe;
4417
                } else{
4418
                    if(tribe !=null) {
4419
                        classification.addParentChild(tribe, tmp, null, null);
4420
                        higherRank=Rank.TRIBE();
4421
                        higherTaxa=tribe;
4422
                    } else{
4423
                        if(subfamily !=null) {
4424
                            classification.addParentChild(subfamily, tmp, null, null);
4425
                            higherRank=Rank.SUBFAMILY();
4426
                            higherTaxa=subfamily;
4427
                        } else
4428
                            if(family !=null) {
4429
                                classification.addParentChild(family, tmp, null, null);
4430
                                higherRank=Rank.FAMILY();
4431
                                higherTaxa=family;
4432
                            }
4433
                            else{
4434
                                //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4435
                                classification.addChildTaxon(tmp, null, null);
4436
                            }
4437
                    }
4438
                }
4439
            }
4440
            return tmp;
4441
        }
4442

    
4443
        /**
4444
         * @param tnb
4445
         * @return
4446
         */
4447
        private Taxon buildSubtribe(NonViralName<?> tnb) {
4448
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4449
            if(tmp==null){
4450
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4451
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4452
                    tmp.setSec(state2.getConfig().getSecundum());
4453
                }
4454
                //                tmp.setSec(refMods);
4455
                //sourceHandler.addSource(refMods, tmp);
4456
                if(tribe != null) {
4457
                    classification.addParentChild(tribe, tmp, null, null);
4458
                    higherRank=Rank.TRIBE();
4459
                    higherTaxa=tribe;
4460
                } else{
4461
                    //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4462
                    classification.addChildTaxon(tmp, null, null);
4463
                }
4464
            }
4465
            return tmp;
4466
        }
4467
        /**
4468
         * @param tnb
4469
         * @return
4470
         */
4471
        private Taxon buildTribe(NonViralName<?> tnb) {
4472
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4473
            if(tmp==null){
4474
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4475
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4476
                    tmp.setSec(state2.getConfig().getSecundum());
4477
                }
4478
                //                tmp.setSec(refMods);
4479
                //sourceHandler.addSource(refMods, tmp);
4480
                if (subfamily !=null) {
4481
                    classification.addParentChild(subfamily, tmp, null, null);
4482
                    higherRank=Rank.SUBFAMILY();
4483
                    higherTaxa=subfamily;
4484
                } else {
4485
                    if(family != null) {
4486
                        classification.addParentChild(family, tmp, null, null);
4487
                        higherRank=Rank.FAMILY();
4488
                        higherTaxa=family;
4489
                    }
4490
                    else{
4491
                        //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4492
                        classification.addChildTaxon(tmp, null, null);
4493
                    }
4494
                }
4495
            }
4496
            return tmp;
4497
        }
4498

    
4499
        /**
4500
         * @param identifier2
4501
         * @return
4502
         */
4503
        @SuppressWarnings("rawtypes")
4504
        private Taxon getTaxonByLSID(String identifier) {
4505
            //logger.info("getTaxonByLSID");
4506
            //            boolean lsidok=false;
4507
            String id = identifier.split("__")[0];
4508
            //            String source = identifier.split("__")[1];
4509
            LSID lsid = null;
4510
            if (id.indexOf("lsid")>-1){
4511
                try {
4512
                    lsid = new LSID(id);
4513
                    //                    lsidok=true;
4514
                } catch (MalformedLSIDException e) {
4515
                    logger.warn("Malformed LSID");
4516
                }
4517
            }
4518
            if (lsid !=null){
4519
                List<Taxon> taxa = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
4520
                LSID currentlsid=null;
4521
                for (Taxon t:taxa){
4522
                    currentlsid = t.getLsid();
4523
                    if (currentlsid !=null){
4524
                        if (currentlsid.getLsid().equals(lsid.getLsid())){
4525
                            try{
4526
                                return t;
4527
                            }
4528
                            catch(Exception e){logger.warn("Exception occurred while comparing LSIDs "+e );}
4529
                        }
4530
                    }
4531
                }
4532
            }
4533
            return null;
4534
        }
4535
        /**
4536
         * @param author2
4537
         * @return
4538
         */
4539
        @SuppressWarnings("rawtypes")
4540
        private Person findOrCreateAuthor(String author2) {
4541
            //logger.info("findOrCreateAuthor");
4542
            List<UuidAndTitleCache<Person>> hiberPersons = importer.getAgentService().getPersonUuidAndTitleCache();
4543
            for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
4544
                if(hibernateP.getTitleCache().equals(author2)) {
4545
                    AgentBase existing = importer.getAgentService().find(hibernateP.getUuid());
4546
                    return CdmBase.deproxy(existing, Person.class);
4547
                }
4548
            }
4549
            Person p = Person.NewInstance();
4550
            p.setTitleCache(author2,true);
4551
            importer.getAgentService().saveOrUpdate(p);
4552
            return CdmBase.deproxy(p, Person.class);
4553
        }
4554
        /**
4555
         * @param author the author to set
4556
         */
4557
        public void setAuthor(String author) {
4558
            this.author = author;
4559
        }
4560

    
4561
        /**
4562
         * @return the higherTaxa
4563
         */
4564
        public Taxon getHigherTaxa() {
4565
            return higherTaxa;
4566
        }
4567
        /**
4568
         * @param higherTaxa the higherTaxa to set
4569
         */
4570
        public void setHigherTaxa(Taxon higherTaxa) {
4571
            this.higherTaxa = higherTaxa;
4572
        }
4573
        /**
4574
         * @return the higherRank
4575
         */
4576
        public Rank getHigherRank() {
4577
            return higherRank;
4578
        }
4579
        /**
4580
         * @param higherRank the higherRank to set
4581
         */
4582
        public void setHigherRank(Rank higherRank) {
4583
            this.higherRank = higherRank;
4584
        }
4585
        public String getName(){
4586
            if (newName.isEmpty()) {
4587
                return originalName;
4588
            } else {
4589
                return newName;
4590
            }
4591

    
4592
        }
4593
        /**
4594
         * @return the fullName
4595
         */
4596
        public String getOriginalName() {
4597
            return originalName;
4598
        }
4599
        /**
4600
         * @param fullName the fullName to set
4601
         */
4602
        public void setOriginalName(String fullName) {
4603
            this.originalName = fullName;
4604
        }
4605
        /**
4606
         * @return the newName
4607
         */
4608
        public String getNewName() {
4609
            return newName;
4610
        }
4611
        /**
4612
         * @param newName the newName to set
4613
         */
4614
        public void setNewName(String newName) {
4615
            this.newName = newName;
4616
        }
4617
        /**
4618
         * @return the rank
4619
         */
4620
        public Rank getRank() {
4621
            return rank;
4622
        }
4623
        /**
4624
         * @param rank the rank to set
4625
         */
4626
        public void setRank(Rank rank) {
4627
            this.rank = rank;
4628
        }
4629
        /**
4630
         * @return the idenfitiger
4631
         */
4632
        public String getIdentifier() {
4633
            return identifier;
4634
        }
4635
        /**
4636
         * @param idenfitiger the idenfitiger to set
4637
         */
4638
        public void setIdentifier(String identifier) {
4639
            this.identifier = identifier;
4640
        }
4641
        /**
4642
         * @return the status
4643
         */
4644
        public String getStatus() {
4645
            if (status == null) {
4646
                return "";
4647
            }
4648
            return status;
4649
        }
4650
        /**
4651
         * @param status the status to set
4652
         */
4653
        public void setStatus(String status) {
4654
            this.status = status;
4655
        }
4656
        /**
4657
         * @return the family
4658
         */
4659
        public Taxon getFamily() {
4660
            return family;
4661
        }
4662
        /**
4663
         * @param family the family to set
4664
         */
4665
        @SuppressWarnings("rawtypes")
4666
        public void setFamily(Taxon family) {
4667
            this.family = family;
4668
            TaxonNameBase taxonNameBase = CdmBase.deproxy(family.getName(), TaxonNameBase.class);
4669
            familyName = castTaxonNameBase(taxonNameBase,familyName);
4670
        }
4671
        /**
4672
         * @return the subfamily
4673
         */
4674
        public Taxon getSubfamily() {
4675
            return subfamily;
4676
        }
4677
        /**
4678
         * @param subfamily the subfamily to set
4679
         */
4680
        @SuppressWarnings("rawtypes")
4681
        public void setSubfamily(Taxon subfamily) {
4682
            this.subfamily = subfamily;
4683
            TaxonNameBase taxonNameBase = CdmBase.deproxy(subfamily.getName(), TaxonNameBase.class);
4684
            subfamilyName = castTaxonNameBase(taxonNameBase,subfamilyName);
4685
        }
4686
        /**
4687
         * @return the tribe
4688
         */
4689
        public Taxon getTribe() {
4690
            return tribe;
4691
        }
4692
        /**
4693
         * @param tribe the tribe to set
4694
         */
4695
        @SuppressWarnings("rawtypes")
4696
        public void setTribe(Taxon tribe) {
4697
            this.tribe = tribe;
4698
            TaxonNameBase taxonNameBase = CdmBase.deproxy(tribe.getName(), TaxonNameBase.class);
4699
            tribeName = castTaxonNameBase(taxonNameBase,tribeName);
4700
        }
4701
        /**
4702
         * @return the subtribe
4703
         */
4704
        public Taxon getSubtribe() {
4705
            return subtribe;
4706
        }
4707
        /**
4708
         * @param subtribe the subtribe to set
4709
         */
4710
        @SuppressWarnings("rawtypes")
4711
        public void setSubtribe(Taxon subtribe) {
4712
            this.subtribe = subtribe;
4713
            TaxonNameBase taxonNameBase = CdmBase.deproxy(subtribe.getName(), TaxonNameBase.class);
4714
            subtribeName =castTaxonNameBase(taxonNameBase,subtribeName);
4715
        }
4716
        /**
4717
         * @return the genus
4718
         */
4719
        public Taxon getGenus() {
4720
            return genus;
4721
        }
4722
        /**
4723
         * @param genus the genus to set
4724
         */
4725
        @SuppressWarnings("rawtypes")
4726
        public void setGenus(Taxon genus) {
4727
            if (genus != null){
4728
	        	this.genus = genus;
4729
	            TaxonNameBase taxonNameBase = CdmBase.deproxy(genus.getName(), TaxonNameBase.class);
4730
	            genusName = castTaxonNameBase(taxonNameBase,genusName);
4731
            }
4732
        }
4733
        /**
4734
         * @return the subgenus
4735
         */
4736
        public Taxon getSubgenus() {
4737
            return subgenus;
4738
        }
4739
        /**
4740
         * @param subgenus the subgenus to set
4741
         */
4742
        @SuppressWarnings("rawtypes")
4743
        public void setSubgenus(Taxon subgenus) {
4744
            this.subgenus = subgenus;
4745
            TaxonNameBase taxonNameBase = CdmBase.deproxy(subgenus.getName(), TaxonNameBase.class);
4746
            subgenusName = castTaxonNameBase(taxonNameBase,subgenusName);
4747
        }
4748
        /**
4749
         * @return the species
4750
         */
4751
        public Taxon getSpecies() {
4752
            return species;
4753
        }
4754
        /**
4755
         * @param species the species to set
4756
         */
4757
        public void setSpecies(Taxon species) {
4758
        	if (species != null){
4759
	            this.species = species;
4760
	            @SuppressWarnings("rawtypes")
4761
	            TaxonNameBase taxonNameBase = CdmBase.deproxy(species.getName(), TaxonNameBase.class);
4762
	            speciesName = castTaxonNameBase(taxonNameBase,speciesName);
4763
        	}
4764
        }
4765
        /**
4766
         * @return the subspecies
4767
         */
4768
        public Taxon getSubspecies() {
4769
            return subspecies;
4770
        }
4771
        /**
4772
         * @param subspecies the subspecies to set
4773
         */
4774
        @SuppressWarnings("rawtypes")
4775
        public void setSubspecies(Taxon subspecies) {
4776
            this.subspecies = subspecies;
4777
            TaxonNameBase taxonNameBase = CdmBase.deproxy(subspecies.getName(), TaxonNameBase.class);
4778
            subspeciesName = castTaxonNameBase(taxonNameBase,subspeciesName);
4779

    
4780
        }
4781

    
4782

    
4783

    
4784
    }
4785

    
4786

    
4787
    /**
4788
     * @param status
4789
     */
4790
    private void addProblematicStatusToFile(String status) {
4791
        try{
4792
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "StatusUnknown_"+classification.getTitleCache()+".txt",true);
4793
            BufferedWriter out = new BufferedWriter(fstream);
4794
            out.write(status+"\n");
4795
            //Close the output stream
4796
            out.close();
4797
        }catch (Exception e){//Catch exception if any
4798
            System.err.println("Error: " + e.getMessage());
4799
        }
4800

    
4801
    }
4802

    
4803

    
4804

    
4805
    /**
4806
     * @param tnb
4807
     * @return
4808
     */
4809
    private Taxon findMatchingTaxon(INonViralName tnb, Reference refMods) {
4810
        logger.info("findMatchingTaxon");
4811
        Taxon tmp=null;
4812

    
4813
        refMods=CdmBase.deproxy(refMods, Reference.class);
4814
        boolean insertAsExisting =false;
4815
        List<Taxon> existingTaxa = new ArrayList<Taxon>();
4816
        try {
4817
            existingTaxa = getMatchingTaxa(tnb);
4818
        } catch (Exception e1) {
4819
            // TODO Auto-generated catch block
4820
            e1.printStackTrace();
4821
        }
4822
        double similarityScore=0.0;
4823
        double similarityAuthor=-1;
4824
        String author1="";
4825
        String author2="";
4826
        String t1="";
4827
        String t2="";
4828
        for (Taxon bestMatchingTaxon : existingTaxa){
4829
            if (!existingTaxa.isEmpty() && state2.getConfig().isInteractWithUser() && !insertAsExisting) {
4830
                //                System.out.println("tnb "+tnb.getTitleCache());
4831
                //                System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4832
                try {
4833
                    if(tnb.getAuthorshipCache()!=null) {
4834
                        author1=tnb.getAuthorshipCache();
4835
                    }
4836
                } catch (Exception e) {
4837
                    // TODO Auto-generated catch block
4838
                    e.printStackTrace();
4839
                }
4840
                try {
4841
                    if(castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache()!=null) {
4842
                        author2=castTaxonNameBase(bestMatchingTaxon.getName()).getAuthorshipCache();
4843
                    }
4844
                } catch (Exception e) {
4845
                    // TODO Auto-generated catch block
4846
                    e.printStackTrace();
4847
                }
4848
                try {
4849
                    t1=tnb.getTitleCache().split("sec.")[0].trim();
4850
                    if (author1!=null && !StringUtils.isEmpty(author1)) {
4851
                        t1=t1.split(Pattern.quote(author1))[0];
4852
                    }
4853
                } catch (Exception e) {
4854
                    // TODO Auto-generated catch block
4855
                    e.printStackTrace();
4856
                }
4857
                try {
4858
                    t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
4859
                    if (author2!=null && !StringUtils.isEmpty(author2)) {
4860
                        t2=t2.split(Pattern.quote(author2))[0];
4861
                    }
4862
                } catch (Exception e) {
4863
                    // TODO Auto-generated catch block
4864
                    e.printStackTrace();
4865
                }
4866
                similarityScore=similarity(t1.trim(), t2.trim());
4867
                //                System.out.println("taxascore: "+similarityScore);
4868
                similarityAuthor=similarity(author1.trim(), author2.trim());
4869
                //                System.out.println("authorscore: "+similarityAuthor);
4870
                insertAsExisting = compareAndCheckTaxon(tnb, refMods, similarityScore, bestMatchingTaxon,similarityAuthor);
4871
            }
4872
            if(insertAsExisting) {
4873
                //System.out.println("KEEP "+bestMatchingTaxon.toString());
4874
                tmp=bestMatchingTaxon;
4875
                sourceHandler.addSource(refMods, tmp);
4876
                return tmp;
4877
            }
4878
        }
4879
        return tmp;
4880
    }
4881

    
4882

    
4883
    /**
4884
     * @param tnb
4885
     * @param refMods
4886
     * @param similarityScore
4887
     * @param bestMatchingTaxon
4888
     * @param similarityAuthor
4889
     * @return
4890
     */
4891
    private boolean compareAndCheckTaxon(INonViralName tnb, Reference refMods, double similarityScore,
4892
            Taxon bestMatchingTaxon, double similarityAuthor) {
4893
        //logger.info("compareAndCheckTaxon");
4894
        boolean insertAsExisting;
4895
        //        if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4896
        //            insertAsExisting=false;
4897
        //        } else{
4898
        //a small hack/automatisation for Chenopodium only
4899
        if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4900
                bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4901
            insertAsExisting=true;
4902
        } else {
4903
            insertAsExisting=askIfReuseBestMatchingTaxon(tnb, bestMatchingTaxon, refMods, similarityScore,similarityAuthor);
4904
        }
4905
        //        }
4906

    
4907
        logDecision(tnb, bestMatchingTaxon, insertAsExisting, refMods);
4908
        return insertAsExisting;
4909
    }
4910

    
4911
    /**
4912
     * @return
4913
     */
4914
    @SuppressWarnings("rawtypes")
4915
    private List<Taxon> getMatchingTaxa(ITaxonNameBase tnb) {
4916
        //logger.info("getMatchingTaxon");
4917
    	if (tnb.getTitleCache() == null){
4918
    		tnb.setTitleCache(tnb.toString(), tnb.isProtectedTitleCache());
4919
    	}
4920

    
4921
        Pager<TaxonBase> pager=importer.getTaxonService().findByTitle(TaxonBase.class, tnb.getTitleCache().split("sec.")[0].trim(), MatchMode.BEGINNING, null, null, null, null, null);
4922
        List<TaxonBase>records = pager.getRecords();
4923

    
4924
        List<Taxon> existingTaxons = new ArrayList<Taxon>();
4925
        for (TaxonBase r:records){
4926
            try{
4927
                Taxon bestMatchingTaxon = (Taxon)r;
4928
                //                System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4929
                if(compareTaxonNameLength(bestMatchingTaxon.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4930
                    existingTaxons.add(bestMatchingTaxon);
4931
                }
4932
            }catch(ClassCastException e){logger.warn("classcast exception, might be a synonym, ignore it");}
4933
        }
4934
        Taxon bmt = importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
4935
        if (!existingTaxons.contains(bmt) && bmt!=null) {
4936
            if(compareTaxonNameLength(bmt.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4937
                existingTaxons.add(bmt);
4938
            }
4939
        }
4940
        return existingTaxons;
4941
    }
4942

    
4943
    /**
4944
     * Check if the found Taxon can reasonnably be the same
4945
     * example: with and without author should match, but the subspecies should not be suggested for a genus
4946
     * */
4947
    private boolean compareTaxonNameLength(String f, String o){
4948
        //logger.info("compareTaxonNameLength");
4949
        boolean lengthOk=false;
4950
        int sizeF = f.length();
4951
        int sizeO = o.length();
4952
        if (sizeO>=sizeF) {
4953
            lengthOk=true;
4954
        }
4955
        if(sizeF>sizeO) {
4956
            if (sizeF-sizeO>10) {
4957
                lengthOk=false;
4958
            } else {
4959
                lengthOk=true;
4960
            }
4961
        }
4962

    
4963
        //        System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4964
        return lengthOk;
4965
    }
4966

    
4967
    private double similarity(String s1, String s2) {
4968
        //logger.info("similarity");
4969
        //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4970
        if(!StringUtils.isEmpty(s1) && !StringUtils.isEmpty(s2)){
4971
            String l1=s1.toLowerCase().trim();
4972
            String l2=s2.toLowerCase().trim();
4973
            if (l1.length() < l2.length()) { // s1 should always be bigger
4974
                String swap = l1; l1 = l2; l2 = swap;
4975
            }
4976
            int bigLen = l1.length();
4977
            if (bigLen == 0) { return 1.0; /* both strings are zero length */ }
4978
            return (bigLen - computeEditDistance(l1, l2)) / (double) bigLen;
4979
        }
4980
        else{
4981
            if(s1!=null && s2!=null){
4982
                if (s1.equalsIgnoreCase(s2)) {
4983
                    return 1;
4984
                }
4985
            }
4986
            return -1;
4987
        }
4988
    }
4989

    
4990
    private int computeEditDistance(String s1, String s2) {
4991
        //logger.info("computeEditDistance");
4992
        int[] costs = new int[s2.length() + 1];
4993
        for (int i = 0; i <= s1.length(); i++) {
4994
            int lastValue = i;
4995
            for (int j = 0; j <= s2.length(); j++) {
4996
                if (i == 0) {
4997
                    costs[j] = j;
4998
                } else {
4999
                    if (j > 0) {
5000
                        int newValue = costs[j - 1];
5001
                        if (s1.charAt(i - 1) != s2.charAt(j - 1)) {
5002
                            newValue = Math.min(Math.min(newValue, lastValue),
5003
                                    costs[j]) + 1;
5004
                        }
5005
                        costs[j - 1] = lastValue;
5006
                        lastValue = newValue;
5007
                    }
5008
                }
5009
            }
5010
            if (i > 0) {
5011
                costs[s2.length()] = lastValue;
5012
            }
5013
        }
5014
        return costs[s2.length()];
5015
    }
5016

    
5017
    Map<Rank, Taxon> hierarchy = new HashMap<Rank, Taxon>();
5018
    /**
5019
     * @param taxonNameBase
5020
     */
5021
    @SuppressWarnings("rawtypes")
5022
    public void lookForParentNode(NonViralName<?> taxonNameBase, Taxon tax, Reference ref, MyName myName) {
5023
        logger.info("lookForParentNode "+taxonNameBase.getTitleCache()+" for "+myName.toString());
5024
        //System.out.println("LOOK FOR PARENT NODE "+taxonnamebase.toString()+"; "+tax.toString()+"; "+taxonnamebase.getRank());
5025
        INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
5026
        if (taxonNameBase.getRank().equals(Rank.FORM())){
5027
            handleFormHierarchy(ref, myName, parser);
5028
        }
5029
        else if (taxonNameBase.getRank().equals(Rank.VARIETY())){
5030
            handleVarietyHierarchy(ref, myName, parser);
5031
        }
5032
        else if (taxonNameBase.getRank().equals(Rank.SUBSPECIES())){
5033
            handleSubSpeciesHierarchy(ref, myName, parser);
5034
        }
5035
        else if (taxonNameBase.getRank().equals(Rank.SPECIES())){
5036
            handleSpeciesHierarchy(ref, myName, parser);
5037
        }
5038
        else if (taxonNameBase.getRank().equals(Rank.SUBGENUS())){
5039
            handleSubgenusHierarchy(ref, myName, parser);
5040
        }
5041

    
5042
        if (taxonNameBase.getRank().equals(Rank.GENUS())){
5043
            handleGenusHierarchy(ref, myName, parser);
5044
        }
5045
        if (taxonNameBase.getRank().equals(Rank.SUBTRIBE())){
5046
            handleSubtribeHierarchy(ref, myName, parser);
5047
        }
5048
        if (taxonNameBase.getRank().equals(Rank.TRIBE())){
5049
            handleTribeHierarchy(ref, myName, parser);
5050
        }
5051

    
5052
        if (taxonNameBase.getRank().equals(Rank.SUBFAMILY())){
5053
            handleSubfamilyHierarchy(ref, myName, parser);
5054
        }
5055
    }
5056

    
5057
    /**
5058
     * @param ref
5059
     * @param myName
5060
     * @param parser
5061
     */
5062
    private void handleSubfamilyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5063
        System.out.println("handleSubfamilyHierarchy");
5064
        String parentStr = myName.getFamilyStr();
5065
        Rank r = Rank.FAMILY();
5066
        if(parentStr!=null){
5067

    
5068
            Taxon parent = null;
5069
            Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, parentStr, MatchMode.BEGINNING, null, null, null, null, null);
5070
            for(TaxonBase tb:taxontest.getRecords()){
5071
                try {
5072
                    if (tb.getName().getRank().equals(r)) {
5073
                        parent=CdmBase.deproxy(tb, Taxon.class);
5074
                    }
5075
                    break;
5076
                } catch (Exception e) {
5077
                    // TODO Auto-generated catch block
5078
                    e.printStackTrace();
5079
                }
5080
            }
5081
            if(parent == null) {
5082
                NonViralName<?> parentNameName =  (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5083
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5084
                if(tmp ==null)
5085
                {
5086
                    parent=Taxon.NewInstance(parentNameName, ref);
5087
                    importer.getTaxonService().save(parent);
5088
                    parent = CdmBase.deproxy(parent, Taxon.class);
5089
                } else {
5090
                    parent=tmp;
5091
                }
5092
                lookForParentNode(parentNameName, parent, ref,myName);
5093

    
5094
            }
5095
            hierarchy.put(r,parent);
5096
        }
5097
    }
5098

    
5099
    /**
5100
     * @param ref
5101
     * @param myName
5102
     * @param parser
5103
     */
5104
    private void handleTribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5105
        String parentStr = myName.getSubfamilyStr();
5106
        Rank r = Rank.SUBFAMILY();
5107
        if (parentStr == null){
5108
            parentStr = myName.getFamilyStr();
5109
            r = Rank.FAMILY();
5110
        }
5111
        if(parentStr!=null){
5112
            NonViralName<?> parentNameName =  (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5113
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5114
            //                    importer.getTaxonService().save(parent);
5115
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5116

    
5117
            boolean parentDoesNotExists = true;
5118
            for (TaxonNode p : classification.getAllNodes()){
5119
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5120
                    parentDoesNotExists = false;
5121
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5122
                    break;
5123
                }
5124
            }
5125
            //                if(parentDoesNotExists) {
5126
            //                    importer.getTaxonService().save(parent);
5127
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5128
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5129
            //                }
5130
            if(parentDoesNotExists) {
5131
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5132
                if(tmp ==null)
5133
                {
5134
                    parent=Taxon.NewInstance(parentNameName, ref);
5135
                    importer.getTaxonService().save(parent);
5136
                    parent = CdmBase.deproxy(parent, Taxon.class);
5137
                } else {
5138
                    parent=tmp;
5139
                }
5140
                lookForParentNode(parentNameName, parent, ref,myName);
5141

    
5142
            }
5143
            hierarchy.put(r,parent);
5144
        }
5145
    }
5146

    
5147
    /**
5148
     * @param ref
5149
     * @param myName
5150
     * @param parser
5151
     */
5152
    private void handleSubtribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5153
        String parentStr = myName.getTribeStr();
5154
        Rank r = Rank.TRIBE();
5155
        if (parentStr == null){
5156
            parentStr = myName.getSubfamilyStr();
5157
            r = Rank.SUBFAMILY();
5158
        }
5159
        if (parentStr == null){
5160
            parentStr = myName.getFamilyStr();
5161
            r = Rank.FAMILY();
5162
        }
5163
        if(parentStr!=null){
5164
            NonViralName<?> parentNameName =  (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5165
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5166
            //                    importer.getTaxonService().save(parent);
5167
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5168

    
5169
            boolean parentDoesNotExists = true;
5170
            for (TaxonNode p : classification.getAllNodes()){
5171
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5172
                    parentDoesNotExists = false;
5173
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5174

    
5175
                    break;
5176
                }
5177
            }
5178
            //                if(parentDoesNotExists) {
5179
            //                    importer.getTaxonService().save(parent);
5180
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5181
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5182
            //                }
5183
            if(parentDoesNotExists) {
5184
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5185
                if(tmp ==null)
5186
                {
5187
                    parent=Taxon.NewInstance(parentNameName, ref);
5188
                    importer.getTaxonService().save(parent);
5189
                    parent = CdmBase.deproxy(parent, Taxon.class);
5190
                } else {
5191
                    parent=tmp;
5192
                }
5193
                lookForParentNode(parentNameName, parent, ref,myName);
5194

    
5195
            }
5196
            hierarchy.put(r,parent);
5197
        }
5198
    }
5199

    
5200
    /**
5201
     * @param ref
5202
     * @param myName
5203
     * @param parser
5204
     */
5205
    private void handleGenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5206
        String parentStr = myName.getSubtribeStr();
5207
        Rank r = Rank.SUBTRIBE();
5208
        if (parentStr == null){
5209
            parentStr = myName.getTribeStr();
5210
            r = Rank.TRIBE();
5211
        }
5212
        if (parentStr == null){
5213
            parentStr = myName.getSubfamilyStr();
5214
            r = Rank.SUBFAMILY();
5215
        }
5216
        if (parentStr == null){
5217
            parentStr = myName.getFamilyStr();
5218
            r = Rank.FAMILY();
5219
        }
5220
        if(parentStr!=null){
5221
            NonViralName<?> parentNameName =  (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5222
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5223
            //                    importer.getTaxonService().save(parent);
5224
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5225

    
5226
            boolean parentDoesNotExist = true;
5227
            for (TaxonNode p : classification.getAllNodes()){
5228
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5229
                    //                        System.out.println(p.getTaxon().getUuid());
5230
                    //                        System.out.println(parent.getUuid());
5231
                    parentDoesNotExist = false;
5232
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5233
                    break;
5234
                }
5235
            }
5236
            //                if(parentDoesNotExists) {
5237
            //                    importer.getTaxonService().save(parent);
5238
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5239
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5240
            //                }
5241
            if(parentDoesNotExist) {
5242
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5243
                if(tmp ==null){
5244

    
5245
                    parent=Taxon.NewInstance(parentNameName, ref);
5246
                    importer.getTaxonService().save(parent);
5247
                    parent = CdmBase.deproxy(parent, Taxon.class);
5248
                } else {
5249
                    parent=tmp;
5250
                }
5251
                lookForParentNode(parentNameName, parent, ref,myName);
5252

    
5253
            }
5254
            hierarchy.put(r,parent);
5255
        }
5256
    }
5257

    
5258
    /**
5259
     * @param ref
5260
     * @param myName
5261
     * @param parser
5262
     */
5263
    private void handleSubgenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5264
        String parentStr = myName.getGenusStr();
5265
        Rank r = Rank.GENUS();
5266

    
5267
        if(parentStr==null){
5268
            parentStr = myName.getSubtribeStr();
5269
            r = Rank.SUBTRIBE();
5270
        }
5271
        if (parentStr == null){
5272
            parentStr = myName.getTribeStr();
5273
            r = Rank.TRIBE();
5274
        }
5275
        if (parentStr == null){
5276
            parentStr = myName.getSubfamilyStr();
5277
            r = Rank.SUBFAMILY();
5278
        }
5279
        if (parentStr == null){
5280
            parentStr = myName.getFamilyStr();
5281
            r = Rank.FAMILY();
5282
        }
5283
        if(parentStr!=null){
5284
            NonViralName<?> parentNameName =  (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5285
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5286
            //                    importer.getTaxonService().save(parent);
5287
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5288

    
5289
            boolean parentDoesNotExists = true;
5290
            for (TaxonNode p : classification.getAllNodes()){
5291
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5292
                    //                        System.out.println(p.getTaxon().getUuid());
5293
                    //                        System.out.println(parent.getUuid());
5294
                    parentDoesNotExists = false;
5295
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5296
                    break;
5297
                }
5298
            }
5299
            //                if(parentDoesNotExists) {
5300
            //                    importer.getTaxonService().save(parent);
5301
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5302
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5303
            //                }
5304
            if(parentDoesNotExists) {
5305
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5306
                if(tmp ==null)
5307
                {
5308
                    parent=Taxon.NewInstance(parentNameName, ref);
5309
                    importer.getTaxonService().save(parent);
5310
                    parent = CdmBase.deproxy(parent, Taxon.class);
5311
                } else {
5312
                    parent=tmp;
5313
                }
5314
                lookForParentNode(parentNameName, parent, ref,myName);
5315

    
5316
            }
5317
            hierarchy.put(r,parent);
5318
        }
5319
    }
5320

    
5321
    /**
5322
     * @param ref
5323
     * @param myName
5324
     * @param parser
5325
     */
5326
    private void handleSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5327
        String parentStr = myName.getSubgenusStr();
5328
        Rank r = Rank.SUBGENUS();
5329

    
5330
        if(parentStr==null){
5331
            parentStr = myName.getGenusStr();
5332
            r = Rank.GENUS();
5333
        }
5334

    
5335
        if(parentStr==null){
5336
            parentStr = myName.getSubtribeStr();
5337
            r = Rank.SUBTRIBE();
5338
        }
5339
        if (parentStr == null){
5340
            parentStr = myName.getTribeStr();
5341
            r = Rank.TRIBE();
5342
        }
5343
        if (parentStr == null){
5344
            parentStr = myName.getSubfamilyStr();
5345
            r = Rank.SUBFAMILY();
5346
        }
5347
        if (parentStr == null){
5348
            parentStr = myName.getFamilyStr();
5349
            r = Rank.FAMILY();
5350
        }
5351
        if(parentStr!=null){
5352
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5353
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5354
            hierarchy.put(r,parent);
5355
        }
5356
    }
5357

    
5358
    /**
5359
     * @param ref
5360
     * @param myName
5361
     * @param parser
5362
     */
5363
    private void handleSubSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5364
        String parentStr = myName.getSpeciesStr();
5365
        Rank r = Rank.SPECIES();
5366

    
5367

    
5368
        if(parentStr==null){
5369
            parentStr = myName.getSubgenusStr();
5370
            r = Rank.SUBGENUS();
5371
        }
5372

    
5373
        if(parentStr==null){
5374
            parentStr = myName.getGenusStr();
5375
            r = Rank.GENUS();
5376
        }
5377

    
5378
        if(parentStr==null){
5379
            parentStr = myName.getSubtribeStr();
5380
            r = Rank.SUBTRIBE();
5381
        }
5382
        if (parentStr == null){
5383
            parentStr = myName.getTribeStr();
5384
            r = Rank.TRIBE();
5385
        }
5386
        if (parentStr == null){
5387
            parentStr = myName.getSubfamilyStr();
5388
            r = Rank.SUBFAMILY();
5389
        }
5390
        if (parentStr == null){
5391
            parentStr = myName.getFamilyStr();
5392
            r = Rank.FAMILY();
5393
        }
5394
        if(parentStr!=null){
5395
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5396
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5397
            hierarchy.put(r,parent);
5398
        }
5399
    }
5400

    
5401

    
5402
    /**
5403
     * @param ref
5404
     * @param myName
5405
     * @param parser
5406
     */
5407
    private void handleFormHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5408
        String parentStr = myName.getSubspeciesStr();
5409
        Rank r = Rank.SUBSPECIES();
5410

    
5411

    
5412
        if(parentStr==null){
5413
            parentStr = myName.getSpeciesStr();
5414
            r = Rank.SPECIES();
5415
        }
5416

    
5417
        if(parentStr==null){
5418
            parentStr = myName.getSubgenusStr();
5419
            r = Rank.SUBGENUS();
5420
        }
5421

    
5422
        if(parentStr==null){
5423
            parentStr = myName.getGenusStr();
5424
            r = Rank.GENUS();
5425
        }
5426

    
5427
        if(parentStr==null){
5428
            parentStr = myName.getSubtribeStr();
5429
            r = Rank.SUBTRIBE();
5430
        }
5431
        if (parentStr == null){
5432
            parentStr = myName.getTribeStr();
5433
            r = Rank.TRIBE();
5434
        }
5435
        if (parentStr == null){
5436
            parentStr = myName.getSubfamilyStr();
5437
            r = Rank.SUBFAMILY();
5438
        }
5439
        if (parentStr == null){
5440
            parentStr = myName.getFamilyStr();
5441
            r = Rank.FAMILY();
5442
        }
5443
        if(parentStr!=null){
5444
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5445
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5446
            hierarchy.put(r,parent);
5447
        }
5448
    }
5449

    
5450
    /**
5451
     * @param ref
5452
     * @param myName
5453
     * @param parser
5454
     */
5455
    private void handleVarietyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5456
        String parentStr = myName.getSubspeciesStr();
5457
        Rank r = Rank.SUBSPECIES();
5458

    
5459
        if(parentStr==null){
5460
            parentStr = myName.getSpeciesStr();
5461
            r = Rank.SPECIES();
5462
        }
5463

    
5464
        if(parentStr==null){
5465
            parentStr = myName.getSubgenusStr();
5466
            r = Rank.SUBGENUS();
5467
        }
5468

    
5469
        if(parentStr==null){
5470
            parentStr = myName.getGenusStr();
5471
            r = Rank.GENUS();
5472
        }
5473

    
5474
        if(parentStr==null){
5475
            parentStr = myName.getSubtribeStr();
5476
            r = Rank.SUBTRIBE();
5477
        }
5478
        if (parentStr == null){
5479
            parentStr = myName.getTribeStr();
5480
            r = Rank.TRIBE();
5481
        }
5482
        if (parentStr == null){
5483
            parentStr = myName.getSubfamilyStr();
5484
            r = Rank.SUBFAMILY();
5485
        }
5486
        if (parentStr == null){
5487
            parentStr = myName.getFamilyStr();
5488
            r = Rank.FAMILY();
5489
        }
5490
        if(parentStr!=null){
5491
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5492
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5493
            hierarchy.put(r,parent);
5494
        }
5495
    }
5496

    
5497
    /**
5498
     * @param ref
5499
     * @param myName
5500
     * @param parser
5501
     * @param parentStr
5502
     * @param r
5503
     * @return
5504
     */
5505
    private Taxon handleParentName(Reference ref, MyName myName, INonViralNameParser<?> parser, String parentStr, Rank r) {
5506
        NonViralName<?> parentNameName =  (NonViralName<?>) parser.parseFullName(parentStr, nomenclaturalCode, r);
5507
        Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5508
        //                    importer.getTaxonService().save(parent);
5509
        //                    parent = CdmBase.deproxy(parent, Taxon.class);
5510

    
5511
        boolean parentDoesNotExists = true;
5512
        for (TaxonNode p : classification.getAllNodes()){
5513
            if(p.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent.getTitleCache().split("sec.")[0].trim())) {
5514
                //                        System.out.println(p.getTaxon().getUuid());
5515
                //                        System.out.println(parent.getUuid());
5516
                parentDoesNotExists = false;
5517
                parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5518
                break;
5519
            }
5520
        }
5521
        if(parentDoesNotExists) {
5522
            Taxon tmp = findMatchingTaxon(parentNameName,ref);
5523
            //                    System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5524
            if(tmp ==null){
5525

    
5526
                parent=Taxon.NewInstance(parentNameName, ref);
5527
                importer.getTaxonService().save(parent);
5528

    
5529
            } else {
5530
                parent=tmp;
5531
            }
5532
            lookForParentNode(parentNameName, parent, ref,myName);
5533

    
5534
        }
5535
        return parent;
5536
    }
5537

    
5538
    private void addNameDifferenceToFile(String originalname, String atomisedname){
5539
        try{
5540
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NamesDifferent_"+classification.getTitleCache()+".txt",true);
5541
            BufferedWriter out = new BufferedWriter(fstream);
5542
            out.write(originalname+" (original) versus "+replaceNull(atomisedname)+" (atomised) \n");
5543
            //Close the output stream
5544
            out.close();
5545
        }catch (Exception e){//Catch exception if any
5546
            System.err.println("Error: " + e.getMessage());
5547
        }
5548
    }
5549
    /**
5550
     * @param name
5551
     * @param author
5552
     * @param nomenclaturalCode2
5553
     * @param rank
5554
     */
5555
    private void addProblemNameToFile(String name, String author, NomenclaturalCode nomenclaturalCode2, Rank rank) {
5556
        try{
5557
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed.txt",true);
5558
            BufferedWriter out = new BufferedWriter(fstream);
5559
            out.write(name+"\t"+replaceNull(author)+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\n");
5560
            //Close the output stream
5561
            out.close();
5562
        }catch (Exception e){//Catch exception if any
5563
            System.err.println("Error: " + e.getMessage());
5564
        }
5565
    }
5566

    
5567

    
5568
    /**
5569
     * @param tnb
5570
     * @param bestMatchingTaxon
5571
     * @param insertAsExisting
5572
     * @param refMods
5573
     */
5574
    private void logDecision(INonViralName tnb, Taxon bestMatchingTaxon, boolean insertAsExisting, Reference refMods) {
5575
        try{
5576
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "Decisions_"+classification.toString()+".txt", true);
5577
            BufferedWriter out = new BufferedWriter(fstream);
5578
            out.write(tnb.getTitleCache() + " sec. " + refMods + "\t" + bestMatchingTaxon.getTitleCache() + "\t" + insertAsExisting + "\n");
5579
            //Close the output stream
5580
            out.close();
5581
        }catch (Exception e){//Catch exception if any
5582
            System.err.println("Error: " + e.getMessage());
5583
        }
5584
    }
5585

    
5586

    
5587
    @SuppressWarnings("unused")
5588
    private String replaceNull(Object in){
5589
        if (in == null) {
5590
            return "";
5591
        }
5592
        if (in.getClass().equals(NomenclaturalCode.class)) {
5593
            return ((NomenclaturalCode)in).getTitleCache();
5594
        }
5595
        return in.toString();
5596
    }
5597

    
5598
    /**
5599
     * @param fullName
5600
     * @param nomenclaturalCode2
5601
     * @param rank
5602
     */
5603
    private void addProblemNameToFile(String type, String name, NomenclaturalCode nomenclaturalCode2, Rank rank, String problems) {
5604
        try{
5605
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed_"+classification.getTitleCache()+".txt",true);
5606
            BufferedWriter out = new BufferedWriter(fstream);
5607
            out.write(type+"\t"+name+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\t"+problems+"\n");
5608
            //Close the output stream
5609
            out.close();
5610
        }catch (Exception e){//Catch exception if any
5611
            System.err.println("Error: " + e.getMessage());
5612
        }
5613

    
5614
    }
5615

    
5616
}
5617

    
5618

    
5619

    
(8-8/9)