Project

General

Profile

Download (232 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
 * Copyright (C) 2013 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9
package eu.etaxonomy.cdm.io.taxonx2013;
10

    
11
import java.io.BufferedWriter;
12
import java.io.File;
13
import java.io.FileWriter;
14
import java.io.IOException;
15
import java.net.URI;
16
import java.util.ArrayList;
17
import java.util.Arrays;
18
import java.util.HashMap;
19
import java.util.List;
20
import java.util.Map;
21
import java.util.Set;
22
import java.util.UUID;
23
import java.util.regex.Matcher;
24
import java.util.regex.Pattern;
25

    
26
import javax.xml.transform.TransformerException;
27
import javax.xml.transform.TransformerFactoryConfigurationError;
28

    
29
import org.apache.commons.lang.StringUtils;
30
import org.apache.log4j.Logger;
31
import org.w3c.dom.Node;
32
import org.w3c.dom.NodeList;
33

    
34
import com.ibm.lsid.MalformedLSIDException;
35

    
36
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
37
import eu.etaxonomy.cdm.api.service.pager.Pager;
38
import eu.etaxonomy.cdm.model.agent.AgentBase;
39
import eu.etaxonomy.cdm.model.agent.Person;
40
import eu.etaxonomy.cdm.model.common.CdmBase;
41
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
42
import eu.etaxonomy.cdm.model.common.LSID;
43
import eu.etaxonomy.cdm.model.common.Language;
44
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
45
import eu.etaxonomy.cdm.model.description.Feature;
46
import eu.etaxonomy.cdm.model.description.FeatureNode;
47
import eu.etaxonomy.cdm.model.description.FeatureTree;
48
import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
49
import eu.etaxonomy.cdm.model.description.TaxonDescription;
50
import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
51
import eu.etaxonomy.cdm.model.description.TextData;
52
import eu.etaxonomy.cdm.model.name.INonViralName;
53
import eu.etaxonomy.cdm.model.name.ITaxonNameBase;
54
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
55
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
56
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
57
import eu.etaxonomy.cdm.model.name.Rank;
58
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
59
import eu.etaxonomy.cdm.model.name.ZoologicalName;
60
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
61
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
62
import eu.etaxonomy.cdm.model.reference.Reference;
63
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
64
import eu.etaxonomy.cdm.model.taxon.Classification;
65
import eu.etaxonomy.cdm.model.taxon.Synonym;
66
import eu.etaxonomy.cdm.model.taxon.SynonymType;
67
import eu.etaxonomy.cdm.model.taxon.Taxon;
68
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
69
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
70
import eu.etaxonomy.cdm.persistence.dto.UuidAndTitleCache;
71
import eu.etaxonomy.cdm.persistence.query.MatchMode;
72
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
73
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
74
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
75
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImplRegExBase;
76

    
77
/**
78
 * @author pkelbert
79
 * @date 2 avr. 2013
80
 *
81
 */
82
public class TaxonXTreatmentExtractor extends TaxonXExtractor{
83

    
84
    private static final String PUBLICATION_YEAR = "publicationYear";
85

    
86
	private static final Logger logger = Logger.getLogger(TaxonXTreatmentExtractor.class);
87

    
88
    private static final String notMarkedUp = "Not marked-up";
89
    private static final UUID proIbioTreeUUID = UUID.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
90
    private static final UUID OtherUUID = UUID.fromString("6465f8aa-2175-446f-807e-7163994b120f");
91
    private static final UUID NotMarkedUpUUID = UUID.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
92
    private static final boolean skippQuestion = true;
93

    
94
    private final NomenclaturalCode nomenclaturalCode;
95
    private Classification classification;
96

    
97
    private  String treatmentMainName,originalTreatmentName;
98

    
99
    private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
100

    
101

    
102
    private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
103
    private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
104

    
105
    private boolean maxRankRespected =false;
106
    private Map<String, Feature> featuresMap;
107

    
108
    private MyName currentMyName;
109

    
110
    private Reference sourceUrlRef;
111

    
112
    private String followingText;  //text element immediately following a tax:name in tax:nomenclature TODO move do state
113
    private String usedFollowingTextPrefix; //the part of the following text which has been used during taxon name creation
114

    
115
    private final TaxonXAddSources sourceHandler = new TaxonXAddSources();
116

    
117
    /**
118
     * @param nomenclaturalCode
119
     * @param classification
120
     * @param importer
121
     * @param configState
122
     */
123
    public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
124
            TaxonXImportState configState,Map<String, Feature> featuresMap,  Reference urlSource) {
125
        this.nomenclaturalCode=nomenclaturalCode;
126
        this.classification = classification;
127
        this.importer=importer;
128
        this.state2=configState;
129
        this.featuresMap=featuresMap;
130
        this.sourceUrlRef =urlSource;
131
        prepareCollectors(configState, importer.getAgentService());
132
        this.sourceHandler.setSourceUrlRef(sourceUrlRef);
133
        this.sourceHandler.setImporter(importer);
134
        this.sourceHandler.setConfigState(configState);
135
    }
136

    
137
    /**
138
     * extracts all the treament information and save them
139
     * @param treatmentnode: the XML Node
140
     * @param tosave: the list of object to save into the CDM
141
     * @param refMods: the reference extracted from the MODS
142
     * @param sourceName: the URI of the document
143
     */
144
    @SuppressWarnings({ "rawtypes", "unused" })
145

    
146
    protected void extractTreatment(Node treatmentnode, Reference refMods, URI sourceName) {        logger.info("extractTreatment");
147
        List<TaxonNameBase> namesToSave = new ArrayList<TaxonNameBase>();
148
        NodeList children = treatmentnode.getChildNodes();
149
        Taxon acceptedTaxon =null;
150
        boolean hasRefgroup=false;
151

    
152
        //needed?
153
        for (int i=0;i<children.getLength();i++){
154
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
155
            	hasRefgroup=true;
156
            }
157
        }
158

    
159
        for (int i=0;i<children.getLength();i++){
160
        	Node child = children.item(i);
161
    		acceptedTaxon = handleSingleNode(refMods, sourceName, namesToSave, child, acceptedTaxon);
162
        }
163
        //        logger.info("saveUpdateNames");
164
        if (maxRankRespected){
165
            importer.getNameService().saveOrUpdate(namesToSave);
166
            importer.getClassificationService().saveOrUpdate(classification);
167
            //logger.info("saveUpdateNames-ok");
168
        }
169

    
170
        buildFeatureTree();
171
    }
172

    
173
	private Taxon handleSingleNode(Reference refMods, URI sourceName,
174
			List<TaxonNameBase> namesToSave, Node child, Taxon acceptedTaxon) {
175
		Taxon defaultTaxon =null;
176

    
177
		String nodeName = child.getNodeName();
178
		if (nodeName.equalsIgnoreCase("tax:nomenclature")){
179
		    NodeList nomenclatureChildren = child.getChildNodes();
180
		    boolean containsName = false;
181
		    for(int k=0; k<nomenclatureChildren.getLength(); k++){
182
		        if(nomenclatureChildren.item(k).getNodeName().equalsIgnoreCase("tax:name")){
183
		            containsName=true;
184
		            break;
185
		        }
186
		    }
187
		    if (containsName){
188
		        reloadClassification();
189
		        //extract "main" the scientific name
190
		        try{
191
		            acceptedTaxon = extractNomenclature(child, namesToSave, refMods);
192
		        }catch(ClassCastException e){
193
		        	//FIXME exception handling
194
		        	e.printStackTrace();
195
		        }
196
		        //                    System.out.println("acceptedTaxon : "+acceptedTaxon);
197
		    }
198
		}else if (nodeName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
199
		    reloadClassification();
200
		    //extract the References within the document
201
		    extractReferences(child, namesToSave ,acceptedTaxon,refMods);
202
		}else if (nodeName.equalsIgnoreCase("tax:div") &&
203
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
204
		    File file = new File(TaxonXImport.LOG_FOLDER + "multipleTaxonX.txt");
205
		    FileWriter writer;
206
		    try {
207
		        writer = new FileWriter(file ,true);
208
		        writer.write(sourceName+"\n");
209
		        writer.flush();
210
		        writer.close();
211
		    } catch (IOException e1) {
212
		        // TODO Auto-generated catch block
213
		        logger.error(e1.getMessage());
214
		    }
215
		    //                String multiple = askMultiple(children.item(i));
216
		    String multiple = "Other";
217
		    if (multiple.equalsIgnoreCase("other")) {
218
		        extractSpecificFeatureNotStructured(child,acceptedTaxon, defaultTaxon,namesToSave, refMods,multiple);
219
		    }else if (multiple.equalsIgnoreCase("synonyms")) {
220
		        try{
221
		            extractSynonyms(child,acceptedTaxon, refMods, null);
222
		        }catch(NullPointerException e){
223
		            logger.warn("the accepted taxon is maybe null");
224
		        }
225
		    }else if(multiple.equalsIgnoreCase("material examined")){
226
		    	extractMaterials(child, acceptedTaxon, refMods, namesToSave);
227
		    }else if (multiple.equalsIgnoreCase("distribution")){
228
		    	extractDistribution(child, acceptedTaxon, defaultTaxon, namesToSave, refMods);
229
		    }else if (multiple.equalsIgnoreCase("type status")){
230
		    	extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, "TypeStatus");
231
		    }else if (multiple.equalsIgnoreCase("vernacular name")){
232
		    	extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
233
		    }else{
234
		    	extractSpecificFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,multiple);
235
		    }
236
		}
237
		else if(nodeName.equalsIgnoreCase("tax:div") &&
238
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
239
		    extractFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, Feature.BIOLOGY_ECOLOGY());
240
		}
241
		else if(nodeName.equalsIgnoreCase("tax:div") &&
242
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected){
243
		    extractDescriptionWithReference(child, acceptedTaxon,defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
244
		}
245
		else if(nodeName.equalsIgnoreCase("tax:div") &&
246
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
247
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
248
		}
249
		else if(nodeName.equalsIgnoreCase("tax:div") &&
250
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
251
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,Feature.DIAGNOSIS());
252
		}
253
		else if(nodeName.equalsIgnoreCase("tax:div") &&
254
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
255
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DISCUSSION());
256
		}
257
		else if(nodeName.equalsIgnoreCase("tax:div") &&
258
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected){
259
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
260
		}
261
		else if(nodeName.equalsIgnoreCase("tax:div") &&
262
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
263
		    extractDistribution(child,acceptedTaxon,defaultTaxon,namesToSave, refMods);
264
		}
265
		else if(nodeName.equalsIgnoreCase("tax:div") &&
266
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
267
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave,refMods,Feature.ETYMOLOGY());
268
		}
269
		else if(nodeName.equalsIgnoreCase("tax:div") &&
270
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
271
		    extractMaterials(child,acceptedTaxon, refMods, namesToSave);
272
		}
273
		else if(nodeName.equalsIgnoreCase("tax:figure") && maxRankRespected){
274
		    extractSpecificFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, "Figure");
275
		}
276
		else if(nodeName.equalsIgnoreCase("tax:div") &&
277
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected){
278
		    extractSpecificFeature(child, acceptedTaxon,defaultTaxon, namesToSave, refMods, "table");
279
		}else if(nodeName.equalsIgnoreCase("tax:div") &&
280
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
281
		    //TODO IGNORE keys for the moment
282
		    //extractKey(children.item(i),acceptedTaxon, nameToSave,source, refMods);
283
		    extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,"Keys - unparsed");
284
		}
285
		else{
286
		    if (! nodeName.equalsIgnoreCase("tax:pb")){
287
		        //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
288
		        if (child.getAttributes() !=null) {
289
		            logger.info("First Attribute: " + child.getAttributes().item(0));
290
		        }
291
		        extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, notMarkedUp);
292
		    }else{
293
		    	//FIXME
294
		    	logger.warn("Unhandled");
295
		    }
296
		}
297
		return acceptedTaxon;
298
	}
299

    
300

    
301
    protected Map<String,Feature> getFeaturesUsed(){
302
        return featuresMap;
303
    }
304
    /**
305
     *
306
     */
307
    private void buildFeatureTree() {
308
        logger.info("buildFeatureTree");
309
        FeatureTree proibiospheretree = importer.getFeatureTreeService().find(proIbioTreeUUID);
310
        if (proibiospheretree == null){
311
            List<FeatureTree> trees = importer.getFeatureTreeService().list(FeatureTree.class, null, null, null, null);
312
            if (trees.size()==1) {
313
                FeatureTree ft = trees.get(0);
314
                if (featuresMap==null) {
315
                    featuresMap=new HashMap<String, Feature>();
316
                }
317
                for (Feature feature: ft.getDistinctFeatures()){
318
                    if(feature!=null) {
319
                        featuresMap.put(feature.getTitleCache(), feature);
320
                    }
321
                }
322
            }
323
            proibiospheretree = FeatureTree.NewInstance();
324
            proibiospheretree.setUuid(proIbioTreeUUID);
325
        }
326
        //        FeatureNode root = proibiospheretree.getRoot();
327
        FeatureNode root2 = proibiospheretree.getRoot();
328
        if (root2 != null){
329
            int nbChildren = root2.getChildCount()-1;
330
            while (nbChildren>-1){
331
                try{
332
                    root2.removeChild(nbChildren);
333
                }catch(Exception e){logger.warn("Can't remove child from FeatureTree "+e);}
334
                nbChildren --;
335
            }
336

    
337
        }
338

    
339
        for (Feature feature:featuresMap.values()) {
340
            root2.addChild(FeatureNode.NewInstance(feature));
341
        }
342
        importer.getFeatureTreeService().saveOrUpdate(proibiospheretree);
343

    
344
    }
345

    
346

    
347
    /**
348
     * @param keys
349
     * @param acceptedTaxon: the current acceptedTaxon
350
     * @param nametosave: the list of objects to save into the CDM
351
     * @param refMods: the current reference extracted from the MODS
352
     */
353
    /*   @SuppressWarnings("rawtypes")
354
    private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference refMods) {
355
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
356

    
357
        NodeList children = keys.getChildNodes();
358
        String key="";
359
        PolytomousKey poly =  PolytomousKey.NewInstance();
360
        poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
361
        poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
362
        poly.addTaxonomicScope(acceptedTaxon);
363
        poly.setTitleCache("bloup", true);
364
        //        poly.addCoveredTaxon(acceptedTaxon);
365
        PolytomousKeyNode root = poly.getRoot();
366
        PolytomousKeyNode previous = null,tmpKey=null;
367
        Taxon taxonKey=null;
368
        List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
369

    
370
        //        String fullContent = keys.getTextContent();
371
        for (int i=0;i<children.getLength();i++){
372
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
373
                NodeList paragraph = children.item(i).getChildNodes();
374
                key="";
375
                taxonKey=null;
376
                for (int j=0;j<paragraph.getLength();j++){
377
                    if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
378
                        if (! paragraph.item(j).getTextContent().trim().isEmpty()){
379
                            key+=paragraph.item(j).getTextContent().trim();
380
                            //                            logger.info("KEY: "+j+"--"+key);
381
                        }
382
                    }
383
                    if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
384
                        taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
385
                    }
386
                }
387
                //                logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
388
                if (keypattern.matcher(key).matches()){
389
                    tmpKey = PolytomousKeyNode.NewInstance(key);
390
                    if (taxonKey!=null) {
391
                        tmpKey.setTaxon(taxonKey);
392
                    }
393
                    polyNodes.add(tmpKey);
394
                    if (previous == null) {
395
                        root.addChild(tmpKey);
396
                    } else {
397
                        previous.addChild(tmpKey);
398
                    }
399
                }else{
400
                    if (!key.isEmpty()){
401
                        tmpKey=PolytomousKeyNode.NewInstance(key);
402
                        if (taxonKey!=null) {
403
                            tmpKey.setTaxon(taxonKey);
404
                        }
405
                        polyNodes.add(tmpKey);
406
                        if (keypatternend.matcher(key).matches()) {
407
                            root.addChild(tmpKey);
408
                            previous=tmpKey;
409
                        } else{
410
                            previous.addChild(tmpKey);
411
                        }
412

    
413
                    }
414
                }
415
            }
416
        }
417
        importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
418
        importer.getPolytomousKeyService().saveOrUpdate(poly);
419
    }
420
*/
421

    
422

    
423
    /**
424
     * @param taxons: the XML Nodegroup
425
     * @param nametosave: the list of objects to save into the CDM
426
     * @param acceptedTaxon: the current accepted Taxon
427
     * @param refMods: the current reference extracted from the MODS
428
     *
429
     * @return Taxon object built
430
     */
431
    @SuppressWarnings({ "rawtypes", "unused" })
432
    private TaxonNameBase getTaxonNameBaseFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference refMods, boolean isSynonym) {
433
        //        logger.info("getTaxonFromXML");
434
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
435
        logger.info("getTaxonNameBaseFromXML");
436
        TaxonNameBase nameToBeFilled = null;
437

    
438
        currentMyName=new MyName(isSynonym);
439

    
440
        NomenclaturalStatusType statusType = null;
441
        try {
442
        	String followingText = null;  //needs to be checked if following text is possible
443
            currentMyName = extractScientificName(taxons,refMods, null);
444
        } catch (TransformerFactoryConfigurationError e1) {
445
            logger.warn(e1);
446
        } catch (TransformerException e1) {
447
            logger.warn(e1);
448
        }
449
        /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
450

    
451
        nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
452
        if (nameToBeFilled.hasProblem() &&
453
                !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
454
            //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
455
            addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
456
            nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
457
        }
458

    
459
        nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
460
         */
461
        nameToBeFilled = currentMyName.getTaxonNameBase();
462
        return nameToBeFilled;
463

    
464
    }
465

    
466

    
467
    /**
468
     *
469
     */
470
    private void reloadClassification() {
471
        logger.info("reloadClassification");
472
        Classification cl = importer.getClassificationService().find(classification.getUuid());
473
        if (cl != null){
474
            classification = cl;
475
        }else{
476
            importer.getClassificationService().saveOrUpdate(classification);
477
            classification = importer.getClassificationService().find(classification.getUuid());
478
        }
479
    }
480

    
481
    //    /**
482
    //     * Create a Taxon for the current NameBase, based on the current reference
483
    //     * @param taxonNameBase
484
    //     * @param refMods: the current reference extracted from the MODS
485
    //     * @return Taxon
486
    //     */
487
    //    @SuppressWarnings({ "unused", "rawtypes" })
488
    //    private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference refMods) {
489
    //        Taxon t = new Taxon(taxonNameBase,null );
490
    //        if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
491
    //            t.setSec(configState.getConfig().getSecundum());
492
    //            logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
493
    //        }
494
    //        /*<<<<<<< .courant
495
    //        boolean sourceExists=false;
496
    //        Set<IdentifiableSource> sources = t.getSources();
497
    //        for (IdentifiableSource src : sources){
498
    //            String micro = src.getCitationMicroReference();
499
    //            Reference r = src.getCitation();
500
    //            if (r.equals(refMods) && micro == null) {
501
    //                sourceExists=true;
502
    //            }
503
    //        }
504
    //        if(!sourceExists) {
505
    //            t.addSource(null,null,refMods,null);
506
    //        }
507
    //=======*/
508
    //        t.addSource(OriginalSourceType.Import,null,null,refMods,null);
509
    //        t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
510
    //        return t;
511
    //    }
512

    
513
    private void  extractDescriptionWithReference(Node typestatus, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods,
514
            String featureName) {
515
        //        System.out.println("extractDescriptionWithReference !");
516
        logger.info("extractDescriptionWithReference");
517
        NodeList children = typestatus.getChildNodes();
518

    
519
        Feature currentFeature=getFeatureObjectFromString(featureName);
520

    
521
        String r="";String s="";
522
        for (int i=0;i<children.getLength();i++){
523
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
524
                s+=children.item(i).getTextContent().trim();
525
            }
526
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
527
                r+= children.item(i).getTextContent().trim();
528
            }
529
            if (s.indexOf(r)>-1) {
530
                s=s.split(r)[0];
531
            }
532
        }
533

    
534
        Reference currentref =  ReferenceFactory.newGeneric();
535
        if(!r.isEmpty()) {
536
            currentref.setTitleCache(r, true);
537
        } else {
538
            currentref=refMods;
539
        }
540
        setParticularDescription(s,acceptedTaxon,defaultTaxon, currentref, refMods,currentFeature);
541
    }
542

    
543
    /**
544
     * @param nametosave
545
     * @param distribution: the XML node group
546
     * @param acceptedTaxon: the current accepted Taxon
547
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
548
     * @param refMods: the current reference extracted from the MODS
549
     */
550
    @SuppressWarnings("rawtypes")
551
    private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference refMods) {
552
        logger.info("extractDistribution");
553
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
554
        NodeList children = distribution.getChildNodes();
555
        Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
556
        Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
557

    
558
        for (int i=0;i<children.getLength();i++){
559
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
560
                NodeList paragraph = children.item(i).getChildNodes();
561
                for (int j=0;j<paragraph.getLength();j++){
562
                    if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
563
                        extractText(descriptionsFulltext, i, paragraph.item(j));
564
                    }
565
                    else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
566
                        extractInLine(nametosave, refMods, descriptionsFulltext, i,paragraph.item(j));
567
                    }
568
                    else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
569
                        MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
570
                        DerivedUnit derivedUnitBase = null;
571
                        specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit, null);
572
                        extractTextFromSpecimenOrObservation(specimenOrObservations, descriptionsFulltext, i, specimenOrObservation);
573
                    }
574
                }
575
            }
576
        }
577

    
578
        int m=0;
579
        for (int k:descriptionsFulltext.keySet()) {
580
            if (k>m) {
581
                m=k;
582
            }
583
        }
584
        for (int k:specimenOrObservations.keySet()) {
585
            if (k>m) {
586
                m=k;
587
            }
588
        }
589

    
590

    
591
        if(acceptedTaxon!=null){
592
            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
593
            Feature currentFeature = Feature.DISTRIBUTION();
594
            //        DerivedUnit derivedUnitBase=null;
595
            //        String descr="";
596
            for (int k=0;k<=m;k++){
597
                if(specimenOrObservations.keySet().contains(k)){
598
                    for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
599
                        handleAssociation(acceptedTaxon, refMods, td, soo);
600
                    }
601
                }
602

    
603
                if (descriptionsFulltext.keySet().contains(k)){
604
                    if (!stringIsEmpty(descriptionsFulltext.get(k).trim()) && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
605
                        setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
606
                        break;
607
                    }
608
                    else{
609
                        handleTextData(refMods, descriptionsFulltext, td, currentFeature, k);
610
                    }
611
                }
612

    
613
                if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
614
                    acceptedTaxon.addDescription(td);
615
                    sourceHandler.addAndSaveSource(refMods, td, null);
616
                    importer.getTaxonService().saveOrUpdate(acceptedTaxon);
617
                }
618
            }
619
        }
620
    }
621

    
622
    /**
623
     * @param refMods
624
     * @param descriptionsFulltext
625
     * @param td
626
     * @param currentFeature
627
     * @param k
628
     */
629
    private void handleTextData(Reference refMods, Map<Integer, String> descriptionsFulltext, TaxonDescription td,
630
            Feature currentFeature, int k) {
631
        //logger.info("handleTextData");
632
        TextData textData = TextData.NewInstance();
633
        textData.setFeature(currentFeature);
634
        textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
635
        sourceHandler.addSource(refMods, textData);
636
        td.addElement(textData);
637
    }
638

    
639
    /**
640
     * @param acceptedTaxon
641
     * @param refMods
642
     * @param td
643
     * @param soo
644
     */
645
    private void handleAssociation(Taxon acceptedTaxon, Reference refMods, TaxonDescription td, MySpecimenOrObservation soo) {
646
        logger.info("handleAssociation");
647
        String descr=soo.getDescr();
648
        DerivedUnit derivedUnitBase = soo.getDerivedUnitBase();
649

    
650
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
651

    
652
        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
653

    
654
        Feature feature=null;
655
        feature = makeFeature(derivedUnitBase);
656
        if(!StringUtils.isEmpty(descr)) {
657
            derivedUnitBase.setTitleCache(descr, true);
658
        }
659

    
660
        IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
661

    
662
        taxonDescription.addElement(indAssociation);
663
        sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
664
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
665
        td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
666
    }
667

    
668
    /**
669
     * create an individualAssociation
670
     * @param refMods
671
     * @param derivedUnitBase
672
     * @param feature
673
     * @return
674
     */
675
    private IndividualsAssociation createIndividualAssociation(Reference refMods, DerivedUnit derivedUnitBase,
676
            Feature feature) {
677
        logger.info("createIndividualAssociation");
678
        IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
679
        indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
680
        indAssociation.setFeature(feature);
681
        indAssociation = sourceHandler.addSource(refMods, indAssociation);
682
        return indAssociation;
683
    }
684

    
685
    /**
686
     * @param specimenOrObservations
687
     * @param descriptionsFulltext
688
     * @param i
689
     * @param specimenOrObservation
690
     */
691
    private void extractTextFromSpecimenOrObservation(Map<Integer, List<MySpecimenOrObservation>> specimenOrObservations,
692
            Map<Integer, String> descriptionsFulltext, int i, MySpecimenOrObservation specimenOrObservation) {
693
        logger.info("extractTextFromSpecimenOrObservation");
694
        List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
695
        if (speObsList == null) {
696
            speObsList=new ArrayList<MySpecimenOrObservation>();
697
        }
698
        speObsList.add(specimenOrObservation);
699
        specimenOrObservations.put(i,speObsList);
700

    
701
        String s = specimenOrObservation.getDerivedUnitBase().toString();
702
        if (descriptionsFulltext.get(i) !=null){
703
            s = descriptionsFulltext.get(i)+" "+s;
704
        }
705
        descriptionsFulltext.put(i, s);
706
    }
707

    
708
    /**
709
     * Extract the text with the inline link to a taxon
710
     * @param nametosave
711
     * @param refMods
712
     * @param descriptionsFulltext
713
     * @param i
714
     * @param paragraph
715
     */
716
    @SuppressWarnings("rawtypes")
717
    private void extractInLine(List<TaxonNameBase> nametosave, Reference refMods, Map<Integer, String> descriptionsFulltext,
718
            int i, Node paragraph) {
719
        //logger.info("extractInLine");
720
        String inLine=getInlineTextForName(nametosave, refMods, paragraph);
721
        if (descriptionsFulltext.get(i) !=null){
722
            inLine = descriptionsFulltext.get(i)+inLine;
723
        }
724
        descriptionsFulltext.put(i, inLine);
725
    }
726

    
727
    /**
728
     * Extract the raw text from a Node
729
     * @param descriptionsFulltext
730
     * @param node
731
     * @param j
732
     */
733
    private void extractText(Map<Integer, String> descriptionsFulltext, int i, Node node) {
734
        //logger.info("extractText");
735
        if(!node.getTextContent().trim().isEmpty()) {
736
            String s =node.getTextContent().trim();
737
            if (descriptionsFulltext.get(i) !=null){
738
                s = descriptionsFulltext.get(i)+" "+s;
739
            }
740
            descriptionsFulltext.put(i, s);
741
        }
742
    }
743

    
744

    
745
    /**
746
     * @param materials: the XML node group
747
     * @param acceptedTaxon: the current accepted Taxon
748
     * @param refMods: the current reference extracted from the MODS
749
     */
750
    @SuppressWarnings("rawtypes")
751
    private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference refMods,List<TaxonNameBase> nametosave) {
752
        logger.info("EXTRACTMATERIALS");
753
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
754
        NodeList children = materials.getChildNodes();
755
        NodeList events = null;
756
        //        String descr="";
757

    
758

    
759
        for (int i=0;i<children.getLength();i++){
760
            String rawAssociation="";
761
            boolean added=false;
762
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
763
                events = children.item(i).getChildNodes();
764
                for(int k=0;k<events.getLength();k++){
765
                    if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
766
                        String inLine= getInlineTextForName(nametosave, refMods, events.item(k));
767
                        if(!inLine.isEmpty()) {
768
                            rawAssociation+=inLine;
769
                        }
770
                    }
771
                    if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
772
                            && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
773
                        rawAssociation+= events.item(k).getTextContent().trim();
774
                    }
775
                    if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
776
                        if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
777
                            rawAssociation="no description text";
778
                        }
779
                        added=true;
780
                        handleDerivedUnitFacadeAndBase(acceptedTaxon, refMods, events.item(k), rawAssociation);
781
                    }
782
                    if (!rawAssociation.isEmpty() && !added){
783

    
784
                        Feature feature = Feature.MATERIALS_EXAMINED();
785
                        featuresMap.put(feature.getTitleCache(),feature);
786

    
787
                        TextData textData = createTextData(rawAssociation, refMods, feature);
788

    
789
                        if(! rawAssociation.isEmpty() && (acceptedTaxon!=null)){
790
                            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
791
                            td.addElement(textData);
792
                            acceptedTaxon.addDescription(td);
793
                            sourceHandler.addAndSaveSource(refMods, td, null);
794
                        }
795
                        //                        DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
796
                        //                        derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
797
                        //
798
                        //                        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
799
                        //                        acceptedTaxon.addDescription(taxonDescription);
800
                        //
801
                        //                        IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
802
                        //
803
                        //                        Feature feature = Feature.MATERIALS_EXAMINED();
804
                        //                        featuresMap.put(feature.getTitleCache(),feature);
805
                        //                        if(!StringUtils.isEmpty(rawAssociation)) {
806
                        //                            derivedUnitBase.setTitleCache(rawAssociation, true);
807
                        //                        }
808
                        //                        indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
809
                        //                        indAssociation.setFeature(feature);
810
                        //                        indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
811
                        //
812
                        //                        /*boolean sourceExists=false;
813
                        //                        Set<DescriptionElementSource> dsources = indAssociation.getSources();
814
                        //                        for (DescriptionElementSource src : dsources){
815
                        //                            String micro = src.getCitationMicroReference();
816
                        //                            Reference r = src.getCitation();
817
                        //                            if (r.equals(refMods) && micro == null) {
818
                        //                                sourceExists=true;
819
                        //                            }
820
                        //                        }
821
                        //                        if(!sourceExists) {
822
                        //                            indAssociation.addSource(null, null, refMods, null);
823
                        //                        }*/
824
                        //                        taxonDescription.addElement(indAssociation);
825
                        //                        taxonDescription.setTaxon(acceptedTaxon);
826
                        //                        taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
827
                        //
828
                        //                        /*sourceExists=false;
829
                        //                        Set<IdentifiableSource> sources = taxonDescription.getSources();
830
                        //                        for (IdentifiableSource src : sources){
831
                        //                            String micro = src.getCitationMicroReference();
832
                        //                            Reference r = src.getCitation();
833
                        //                            if (r.equals(refMods) && micro == null) {
834
                        //                                sourceExists=true;
835
                        //                            }
836
                        //                        }
837
                        //                        if(!sourceExists) {
838
                        //                            taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
839
                        //                        }*/
840
                        //
841
                        //                        importer.getDescriptionService().saveOrUpdate(taxonDescription);
842
                        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
843

    
844
                        rawAssociation="";
845
                    }
846
                }
847
            }
848
        }
849
    }
850

    
851
    /**
852
     * @param acceptedTaxon
853
     * @param refMods
854
     * @param events
855
     * @param rawAssociation
856
     * @param k
857
     */
858
    private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon, Reference refMods, Node event,
859
            String rawAssociation) {
860
        logger.info("handleDerivedUnitFacadeAndBase");
861
        String descr;
862
        DerivedUnit derivedUnitBase;
863
        MySpecimenOrObservation myspecimenOrObservation;
864
        DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
865
        derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
866

    
867
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
868

    
869
        //TODO this may not always be correct, ask user
870
        TaxonNameBase<?,?> typifiableName = acceptedTaxon != null ?  acceptedTaxon.getName() : null;
871
        myspecimenOrObservation = extractSpecimenOrObservation(event,derivedUnitBase,SpecimenOrObservationType.DerivedUnit, typifiableName);
872
        derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
873
        descr=myspecimenOrObservation.getDescr();
874

    
875
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
876

    
877
        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
878

    
879
        Feature feature = makeFeature(derivedUnitBase);
880
        featuresMap.put(feature.getTitleCache(),feature);
881
        if(!StringUtils.isEmpty(descr)) {
882
            derivedUnitBase.setTitleCache(descr, true);
883
        }
884

    
885
        IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
886

    
887
        taxonDescription.addElement(indAssociation);
888
        sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
889
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
890
    }
891

    
892

    
893

    
894
    /**
895
     * @param currentName
896
     * @param materials: the XML node group
897
     * @param acceptedTaxon: the current accepted Taxon
898
     * @param refMods: the current reference extracted from the MODS
899
     */
900
    private String extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference refMods, String event, TaxonNameBase<?,?> currentName) {
901
        logger.info("extractMaterialsDirect");
902
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
903
        String descr="";
904

    
905
        DerivedUnit derivedUnitBase=null;
906
        MySpecimenOrObservation myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.DerivedUnit, currentName);
907
        derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
908

    
909
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
910

    
911
        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
912

    
913
        Feature feature=null;
914
        if (event.equalsIgnoreCase("collection")){
915
            feature = makeFeature(derivedUnitBase);
916
        }
917
        else{
918
            feature = Feature.MATERIALS_EXAMINED();
919
        }
920
        featuresMap.put(feature.getTitleCache(),  feature);
921

    
922
        descr=myspecimenOrObservation.getDescr();
923
        if(!StringUtils.isEmpty(descr)) {
924
            derivedUnitBase.setTitleCache(descr, true);
925
        }
926

    
927
        IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
928

    
929
        taxonDescription.addElement(indAssociation);
930
        sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
931
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
932

    
933
        return derivedUnitBase.getTitleCache();
934

    
935
    }
936

    
937

    
938
    /**
939
     * @param description: the XML node group
940
     * @param acceptedTaxon: the current acceptedTaxon
941
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
942
     * @param nametosave: the list of objects to save into the CDM
943
     * @param refMods: the current reference extracted from the MODS
944
     * @param featureName: the feature name
945
     */
946
    @SuppressWarnings({ "rawtypes"})
947
    private String extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
948
            List<TaxonNameBase> nametosave, Reference refMods, String featureName ) {
949
        logger.info("extractSpecificFeature "+featureName);
950
        //        System.out.println("GRUUUUuu");
951
        NodeList children = description.getChildNodes();
952
        NodeList insideNodes ;
953
        NodeList trNodes;
954
        //        String descr ="";
955
        String localdescr="";
956
        List<String> blabla=null;
957
        List<String> text = new ArrayList<String>();
958

    
959
        String table="<table>";
960
        String head="";
961
        String line="";
962

    
963
        Feature currentFeature=getFeatureObjectFromString(featureName);
964

    
965
        //        String fullContent = description.getTextContent();
966
        for (int i=0;i<children.getLength();i++){
967
            //            localdescr="";
968
            if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
969
                text.add(children.item(i).getTextContent().trim());
970
            }
971
            if (featureName.equalsIgnoreCase("table")){
972
                if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
973
                        children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
974
                    head = extractTableHead(children.item(i));
975
                    table+=head;
976
                    line = extractTableLine(children.item(i));
977
                    if (!line.equalsIgnoreCase("<tr></tr>")) {
978
                        table+=line;
979
                    }
980
                }
981
                if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
982
                        children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
983
                    line = extractTableLineWithColumn(children.item(i).getChildNodes());
984
                    if(!line.equalsIgnoreCase("<tr></tr>")) {
985
                        table+=line;
986
                    }
987
                }
988
            }
989
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
990
                insideNodes=children.item(i).getChildNodes();
991
                blabla= new ArrayList<String>();
992
                for (int j=0;j<insideNodes.getLength();j++){
993
                    Node insideNode = insideNodes.item(j);
994
                	if (insideNode.getNodeName().equalsIgnoreCase("tax:name")){
995
                        String inlinetext = getInlineTextForName(nametosave, refMods, insideNode);
996
                        if (!inlinetext.isEmpty()) {
997
                            blabla.add(inlinetext);
998
                        }
999
                    }
1000
                    else if (insideNode.getNodeName().equalsIgnoreCase("#text")) {
1001
                        if(!insideNode.getTextContent().trim().isEmpty()){
1002
                            blabla.add(insideNode.getTextContent().trim());
1003
                            //                            localdescr += insideNodes.item(j).getTextContent().trim();
1004
                        }
1005
                    }
1006
                }
1007
                if (!blabla.isEmpty()) {
1008
                    String blaStr = StringUtils.join(blabla," ").trim();
1009
                    if(!stringIsEmpty(blaStr)) {
1010
                        setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1011
                        text.add(blaStr);
1012
                    }
1013
                }
1014

    
1015
            }
1016
            if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1017
                if(!children.item(i).getTextContent().trim().isEmpty()){
1018
                    localdescr = children.item(i).getTextContent().trim();
1019
                    if(!stringIsEmpty(localdescr)) {
1020
                        setParticularDescription(localdescr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1021
                    }
1022
                }
1023
            }
1024
        }
1025

    
1026
        table+="</table>";
1027
        if (!table.equalsIgnoreCase("<table></table>")){
1028
            //            System.out.println("TABLE : "+table);
1029
            text.add(table);
1030
        }
1031

    
1032
        if (text !=null && !text.isEmpty()) {
1033
            return StringUtils.join(text," ");
1034
        } else {
1035
            return "";
1036
        }
1037

    
1038
    }
1039

    
1040
    /**
1041
     * @param children
1042
     * @param i
1043
     * @return
1044
     */
1045
    private String extractTableLine(Node child) {
1046
        //logger.info("extractTableLine");
1047
        String line;
1048
        line="<tr>";
1049
        if (child.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1050
            line = extractTableLineWithColumn(child.getChildNodes());
1051
        }
1052
        line+="</tr>";
1053
        return line;
1054
    }
1055

    
1056
    /**
1057
     * @param children
1058
     * @param i
1059
     * @return
1060
     */
1061
    private String extractTableHead(Node child) {
1062
        //logger.info("extractTableHead");
1063
        String head;
1064
        String line;
1065
        head="<th>";
1066
        NodeList trNodes = child.getChildNodes();
1067
        for (int k=0;k<trNodes.getLength();k++){
1068
            if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:div")
1069
                    && trNodes.item(k).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1070
                line = extractTableLineWithColumn(trNodes.item(k).getChildNodes());
1071
                head+=line;
1072
            }
1073
        }
1074
        head+="</th>";
1075
        return head;
1076
    }
1077

    
1078
    /**
1079
     * build a html table line, with td columns
1080
     * @param tdNodes
1081
     * @return an html coded line
1082
     */
1083
    private String extractTableLineWithColumn(NodeList tdNodes) {
1084
        //logger.info("extractTableLineWithColumn");
1085
        String line;
1086
        line="<tr>";
1087
        for (int l=0;l<tdNodes.getLength();l++){
1088
            if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1089
                line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1090
            }
1091
        }
1092
        line+="</tr>";
1093
        return line;
1094
    }
1095

    
1096
    /**
1097
     * @param description: the XML node group
1098
     * @param acceptedTaxon: the current acceptedTaxon
1099
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1100
     * @param nametosave: the list of objects to save into the CDM
1101
     * @param refMods: the current reference extracted from the MODS
1102
     * @param featureName: the feature name
1103
     */
1104
    @SuppressWarnings({ "unused", "rawtypes" })
1105
    private String extractSpecificFeatureNotStructured(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1106
            List<TaxonNameBase> nameToSave, Reference refMods, String featureName ) {
1107
        logger.info("extractSpecificFeatureNotStructured " + featureName);
1108
        NodeList children = description.getChildNodes();
1109
        NodeList insideNodes ;
1110
        List<String> blabla= new ArrayList<String>();
1111

    
1112

    
1113
        Feature currentFeature = getFeatureObjectFromString(featureName);
1114

    
1115
        String fullContent = description.getTextContent();
1116
        for (int i=0;i<children.getLength();i++){
1117
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1118
                insideNodes=children.item(i).getChildNodes();
1119
                for (int j=0;j<insideNodes.getLength();j++){
1120
                    if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1121
                        String inlineText =getInlineTextForName(nameToSave, refMods, insideNodes.item(j));
1122
                        if(!inlineText.isEmpty()) {
1123
                            blabla.add(inlineText);
1124
                        }
1125
                    }
1126
                    if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1127
                        if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1128
                            blabla.add(insideNodes.item(j).getTextContent().trim());
1129
                        }
1130
                    }
1131
                }
1132
            }
1133
            if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1134
                if(!children.item(i).getTextContent().trim().isEmpty()){
1135
                    String localdescr = children.item(i).getTextContent().trim();
1136
                    if(!localdescr.isEmpty())
1137
                    {
1138
                        blabla.add(localdescr);
1139
                    }
1140
                }
1141
            }
1142
        }
1143

    
1144
        if (blabla !=null && !blabla.isEmpty()) {
1145
            String blaStr = StringUtils.join(blabla," ").trim();
1146
            if (! stringIsEmpty(blaStr)) {
1147
                setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1148
                return blaStr;
1149
            } else {
1150
                return "";
1151
            }
1152
        } else {
1153
            return "";
1154
        }
1155

    
1156
    }
1157

    
1158
    /**
1159
     * @param blaStr
1160
     * @return
1161
     */
1162
    private boolean stringIsEmpty(String blaStr) {
1163
        if (blaStr.matches("(\\.|,|;|\\.-)?")){
1164
        	return true;
1165
        }else{
1166
        	return false;
1167
        }
1168
    }
1169

    
1170
    /**
1171
     * @param nametosave
1172
     * @param refMods
1173
     * @param insideNodes
1174
     * @param blabla
1175
     * @param j
1176
     */
1177
    @SuppressWarnings({ "rawtypes" })
1178
    private String getInlineTextForName(List<TaxonNameBase> nametosave, Reference refMods, Node insideNode) {
1179
        if (true){
1180
        	NodeList children = insideNode.getChildNodes();
1181
        	String result = "";
1182
            for (int i=0;i<children.getLength();i++){
1183
            	Node nameChild = children.item(i);
1184
                if(nameChild.getNodeName().equalsIgnoreCase("#text")){
1185
                	result += nameChild.getTextContent();
1186
                }else{
1187
                	//do nothing
1188
                }
1189
            }
1190
        	return result.replace("\n", "").trim();
1191
        }else{
1192
	    	TaxonNameBase tnb = getTaxonNameBaseFromXML(insideNode, nametosave,refMods,false);
1193
	        //                        Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1194
	        Taxon tax = currentMyName.getTaxon();
1195
	        if(tnb !=null && tax != null){
1196
	            String linkedTaxon = tnb.getTitleCache().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1197
	            return "<cdm:taxon uuid='"+tax.getUuid()+"'>"+linkedTaxon+"</cdm:taxon>";
1198
	        }else if (tnb != null && tax == null){
1199
	        	//TODO
1200
	        	return "<cdm:taxonName uuid='" + tnb.getUuid() +"'>" + tnb.getTitleCache().split("sec")[0]  +"</cdm:taxonName>";
1201
	        }else{
1202
	        	logger.warn("Inline text has no content yet");
1203
	        }
1204
	        return "";
1205
        }
1206
    }
1207

    
1208
    /**
1209
     * @param featureName
1210
     * @return
1211
     */
1212
    @SuppressWarnings("rawtypes")
1213
    private Feature getFeatureObjectFromString(String featureName) {
1214
        logger.info("getFeatureObjectFromString");
1215
        List<Feature> features = importer.getTermService().list(Feature.class, null,null,null,null);
1216
        Feature currentFeature=null;
1217
        for (Feature feature: features){
1218
            String tmpF = feature.getTitleCache();
1219
            if (tmpF.equalsIgnoreCase(featureName)) {
1220
                currentFeature=feature;
1221
                //                System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1222
            }
1223
        }
1224
        if (currentFeature == null) {
1225
            currentFeature=Feature.NewInstance(featureName, featureName, featureName);
1226
            if(featureName.equalsIgnoreCase("Other")){
1227
                currentFeature.setUuid(OtherUUID);
1228
            }
1229
            if(featureName.equalsIgnoreCase(notMarkedUp)){
1230
                currentFeature.setUuid(NotMarkedUpUUID);
1231
            }
1232
            importer.getTermService().saveOrUpdate(currentFeature);
1233
        }
1234
        return currentFeature;
1235
    }
1236

    
1237

    
1238

    
1239

    
1240
    /**
1241
     * @param children: the XML node group
1242
     * @param nametosave: the list of objects to save into the CDM
1243
     * @param acceptedTaxon: the current acceptedTaxon
1244
     * @param refMods: the current reference extracted from the MODS
1245
     * @param fullContent :the parsed XML content
1246
     * @return a list of description (text)
1247
     */
1248
    @SuppressWarnings({ "unused", "rawtypes" })
1249
    private List<String> parseParagraph(List<TaxonNameBase> namesToSave, Taxon acceptedTaxon, Reference refMods, Node paragraph, Feature feature){
1250
        logger.info("parseParagraph "+feature.toString());
1251
        List<String> fullDescription=  new ArrayList<String>();
1252
        //        String localdescr;
1253
        String descr="";
1254
        NodeList insideNodes ;
1255
        boolean collectionEvent = false;
1256
        List<Node>collectionEvents = new ArrayList<Node>();
1257

    
1258
        NodeList children = paragraph.getChildNodes();
1259

    
1260
        for (int i=0;i<children.getLength();i++){
1261
            //            localdescr="";
1262
            if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1263
                descr += children.item(i).getTextContent().trim();
1264
            }
1265
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1266
                insideNodes=children.item(i).getChildNodes();
1267
                List<String> blabla= new ArrayList<String>();
1268
                for (int j=0;j<insideNodes.getLength();j++){
1269
                    boolean nodeKnown = false;
1270
                    //    System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1271
                    if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1272
                        String inlineText = getInlineTextForName(namesToSave, refMods, insideNodes.item(j));
1273
                        if (!inlineText.isEmpty()) {
1274
                            blabla.add(inlineText);
1275
                        }
1276
                        nodeKnown=true;
1277
                    }
1278
                    else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1279
                        if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1280
                            blabla.add(insideNodes.item(j).getTextContent().trim());
1281
                            // localdescr += insideNodes.item(j).getTextContent().trim();
1282
                        }
1283
                        nodeKnown=true;
1284
                    }
1285
                    else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
1286
                        String ref = insideNodes.item(j).getTextContent().trim();
1287
                        if (ref.endsWith(";")  && ((ref.length())>1)) {
1288
                            ref=ref.substring(0, ref.length()-1)+".";
1289
                        }
1290
                        Reference reference = ReferenceFactory.newGeneric();
1291
                        reference.setTitleCache(ref, true);
1292
                        blabla.add(reference.getTitleCache());
1293
                        nodeKnown=true;
1294
                    }
1295
                    else if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:figure")){
1296
                        String figure = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "figure");
1297
                        blabla.add(figure);
1298
                    }
1299
                    else if(insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:div") &&
1300
                            insideNodes.item(j).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1301
                            insideNodes.item(j).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1302
                        String table = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1303
                        blabla.add(table);
1304
                    }
1305
                    else if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1306
                        //                        logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1307
                        String titlecache  = extractMaterialsDirect(insideNodes.item(j), acceptedTaxon, refMods, "collection", null);
1308
                        blabla.add(titlecache);
1309
                        collectionEvent=true;
1310
                        collectionEvents.add(insideNodes.item(j));
1311
                        nodeKnown=true;
1312
                    }else{
1313
                    	logger.warn("node not handled yet: " + insideNodes.item(j).getNodeName());
1314
                    }
1315

    
1316
                }
1317
                if (!StringUtils.isBlank(StringUtils.join(blabla," "))) {
1318
                    fullDescription.add(StringUtils.join(blabla," "));
1319
                }
1320
            }
1321
            if  (children.item(i).getNodeName().equalsIgnoreCase("tax:figure")){
1322
                String figure = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "Figure");
1323
                fullDescription.add(figure);
1324
            }
1325
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1326
                    children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1327
                    children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1328
                String table = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1329
                fullDescription.add(table);
1330
            }
1331
        }
1332

    
1333
        if( !stringIsEmpty(descr.trim())){
1334
            Feature currentFeature= getNotMarkedUpFeatureObject();
1335
            setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1336
        }
1337
        //        if (collectionEvent) {
1338
        //            logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1339
        //            for (Node coll:collectionEvents){
1340
        //                = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1341
        //            }
1342
        //        }
1343
        return fullDescription;
1344
    }
1345

    
1346

    
1347
    /**
1348
     * @param description: the XML node group
1349
     * @param acceptedTaxon: the current acceptedTaxon
1350
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1351
     * @param nametosave: the list of objects to save into the CDM
1352
     * @param refMods: the current reference extracted from the MODS
1353
     * @param feature: the feature to link the data with
1354
     */
1355
    @SuppressWarnings("rawtypes")
1356
    private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> namesToSave, Reference refMods, Feature feature){
1357
        logger.info("EXTRACT FEATURE "+feature.toString());
1358
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1359
        List<String> fullDescription= parseParagraph( namesToSave, acceptedTaxon, refMods, description,feature);
1360

    
1361
        //        System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1362
        if (!fullDescription.isEmpty() &&!stringIsEmpty(StringUtils.join(fullDescription,"\n").trim())) {
1363
            setParticularDescription(StringUtils.join(fullDescription,"\n").trim(),acceptedTaxon,defaultTaxon, refMods,feature);
1364
        }
1365

    
1366
    }
1367

    
1368

    
1369
    /**
1370
     * @param descr: the XML Nodegroup to parse
1371
     * @param acceptedTaxon: the current acceptedTaxon
1372
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1373
     * @param refMods: the current reference extracted from the MODS
1374
     * @param currentFeature: the feature name
1375
     * @return
1376
     */
1377
    private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods, Feature currentFeature) {
1378
        logger.info("setParticularDescription " + currentFeature.getTitleCache()+", \n blabla : "+descr);
1379

    
1380
        //remove redundant feature title
1381
        String featureStr = currentFeature.getTitleCache();
1382
        if (!descr.isEmpty() && descr.toLowerCase().startsWith(featureStr.toLowerCase())){
1383
        	descr = descr.replaceAll("(?i)" + featureStr + "\\.\\s*", "");
1384
        }
1385

    
1386

    
1387
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1388
        featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1389

    
1390
        TextData textData = createTextData(descr, refMods, currentFeature);
1391

    
1392
        if(acceptedTaxon!=null){
1393
            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1394
            td.addElement(textData);
1395
            acceptedTaxon.addDescription(td);
1396

    
1397
            sourceHandler.addAndSaveSource(refMods, td, null);
1398
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1399
        }
1400

    
1401
        if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1402
            try{
1403
                Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1404
                if (tmp!=null) {
1405
                    defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1406
                }else{
1407
                    importer.getTaxonService().saveOrUpdate(defaultTaxon);
1408
                }
1409
            }catch(Exception e){
1410
                logger.debug("TAXON EXISTS"+defaultTaxon);
1411
            }
1412

    
1413
            TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1414
            defaultTaxon.addDescription(td);
1415
            td.addElement(textData);
1416
            sourceHandler.addAndSaveSource(refMods, td, null);
1417
            importer.getTaxonService().saveOrUpdate(defaultTaxon);
1418
        }
1419
    }
1420

    
1421
    /**
1422
     * @param descr
1423
     * @param refMods
1424
     * @param currentFeature
1425
     * @return
1426
     */
1427
    private TextData createTextData(String descr, Reference refMods, Feature currentFeature) {
1428
        //logger.info("createTextData");
1429
        TextData textData = TextData.NewInstance();
1430
        textData.setFeature(currentFeature);
1431
        sourceHandler.addSource(refMods, textData);
1432

    
1433
        textData.putText(Language.UNKNOWN_LANGUAGE(), descr);
1434
        return textData;
1435
    }
1436

    
1437

    
1438

    
1439
    /**
1440
     * @param descr: the XML Nodegroup to parse
1441
     * @param acceptedTaxon: the current acceptedTaxon
1442
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1443
     * @param refMods: the current reference extracted from the MODS
1444
     * @param currentFeature: the feature name
1445
     * @return
1446
     */
1447
    private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon,Reference currentRef, Reference refMods, Feature currentFeature) {
1448
        //        System.out.println("setParticularDescriptionSPecial "+currentFeature);
1449
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
1450
        logger.info("setParticularDescription");
1451
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1452

    
1453
        featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1454
        TextData textData = createTextData(descr, refMods, currentFeature);
1455

    
1456
        if(! descr.isEmpty() && (acceptedTaxon!=null)){
1457
            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1458
            td.addElement(textData);
1459
            acceptedTaxon.addDescription(td);
1460

    
1461
            sourceHandler.addAndSaveSource(refMods, td, currentRef);
1462
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1463
        }
1464

    
1465
        if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1466
            try{
1467
                Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1468
                if (tmp!=null) {
1469
                    defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1470
                }else{
1471
                    importer.getTaxonService().saveOrUpdate(defaultTaxon);
1472
                }
1473
            }catch(Exception e){
1474
                logger.debug("TAXON EXISTS"+defaultTaxon);
1475
            }
1476

    
1477
            TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1478
            defaultTaxon.addDescription(td);
1479
            td.addElement(textData);
1480
            sourceHandler.addAndSaveSource(currentRef, td,currentRef);
1481
            importer.getTaxonService().saveOrUpdate(defaultTaxon);
1482
        }
1483
    }
1484

    
1485

    
1486

    
1487
    /**
1488
     * @param synonyms: the XML Nodegroup to parse
1489
     * @param nametosave: the list of objects to save into the CDM
1490
     * @param acceptedTaxon: the current acceptedTaxon
1491
     * @param refMods: the current reference extracted from the MODS
1492
     */
1493
    @SuppressWarnings({ "rawtypes" })
1494
    private void extractSynonyms(Node synonymsNode, Taxon acceptedTaxon,Reference refMods, String followingText) {
1495
        logger.info("extractSynonyms");
1496
        //System.out.println("extractSynonyms for: "+acceptedTaxon);
1497
        Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1498
        if (ttmp != null) {
1499
            acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1500
        }
1501
        else{
1502
            acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1503
        }
1504
        NodeList children = synonymsNode.getChildNodes();
1505
        List<MyName> names = new ArrayList<MyName>();
1506

    
1507
        if(synonymsNode.getNodeName().equalsIgnoreCase("tax:name")){
1508
            try {
1509
            	MyName myName = extractScientificNameSynonym(synonymsNode, refMods, followingText);
1510
                names.add(myName);
1511
            } catch (TransformerFactoryConfigurationError e) {
1512
                logger.warn(e);
1513
            } catch (TransformerException e) {
1514
                logger.warn(e);
1515
            }
1516
        }
1517

    
1518

    
1519
        for (int i=0;i<children.getLength();i++){
1520
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1521
                NodeList tmp = children.item(i).getChildNodes();
1522
                //                String fullContent = children.item(i).getTextContent();
1523
                for (int j=0; j< tmp.getLength();j++){
1524
                    if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1525
                        try {
1526
                        	MyName myName = extractScientificNameSynonym(tmp.item(j),refMods, followingText);
1527
                            names.add(myName);
1528
                        } catch (TransformerFactoryConfigurationError e) {
1529
                            logger.warn(e);
1530
                        } catch (TransformerException e) {
1531
                            logger.warn(e);
1532
                        }
1533
                    }
1534
                }
1535
            }
1536
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1537
                try {
1538
                	MyName myName = extractScientificNameSynonym(children.item(i),refMods, followingText);
1539
                    names.add(myName);
1540
                } catch (TransformerFactoryConfigurationError e) {
1541
                    logger.warn(e);
1542
                } catch (TransformerException e) {
1543
                    logger.warn(e);
1544
                }
1545

    
1546
            }
1547
        }
1548

    
1549
        for(MyName name:names){
1550
        	TaxonNameBase nameToBeFilled = name.getTaxonNameBase();
1551
            Synonym synonym = name.getSyno();
1552
            addFollowingTextToName(nameToBeFilled, followingText);
1553

    
1554
            /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1555
            nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1556
            if (nameToBeFilled.hasProblem() &&
1557
                    !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1558
                //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1559
                addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1560
                nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1561
            }
1562
            nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1563
             */
1564
            if (!name.getIdentifier().isEmpty() && (name.getIdentifier().length()>2)){
1565
                setLSID(name.getIdentifier(), synonym);
1566
            }
1567

    
1568
            Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1569
            boolean synoExist = false;
1570
            for (Synonym syn: synonymsSet){
1571

    
1572
                boolean a =syn.getName().equals(synonym.getName());
1573
                boolean b = syn.getSec().equals(synonym.getSec());
1574
                if (a && b) {
1575
                    synoExist=true;
1576
                }
1577
            }
1578
            if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1579
                sourceHandler.addSource(refMods, synonym);
1580
                acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1581
            }
1582
        }
1583
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1584
    }
1585

    
1586

    
1587
    private boolean addFollowingTextToName(ITaxonNameBase nameToBeFilled, String followingText) {
1588
    	if (nameToBeFilled != null && StringUtils.isNotBlank(followingText)){
1589
    		if (! followingText.matches("\\d\\.?")){
1590

    
1591
	    		if (followingText.startsWith(",")){
1592
	    			followingText = followingText.substring(1).trim();
1593
	    		}
1594
	    		nameToBeFilled.setFullTitleCache(nameToBeFilled.getFullTitleCache()+ "," +followingText , true);
1595
    		}
1596
    		return true;
1597
    	}
1598
    	return false;
1599

    
1600
	}
1601

    
1602
	/**
1603
     * @param refgroup: the XML nodes
1604
     * @param nametosave: the list of objects to save into the CDM
1605
     * @param acceptedTaxon: the current acceptedTaxon
1606
     * @param nametosave: the list of objects to save into the CDM
1607
     * @param refMods: the current reference extracted from the MODS
1608
     * @return the acceptedTaxon (why?)
1609
     * handle cases where the bibref are inside <p> and outside
1610
     */
1611
    @SuppressWarnings({ "rawtypes" })
1612
    private Taxon extractReferences(Node refgroup, List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference refMods) {
1613
        logger.info("extractReferences");
1614
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1615

    
1616
        NodeList children = refgroup.getChildNodes();
1617
        INonViralName nameToBeFilled = getNonViralNameAccNomenclature();
1618

    
1619
        ReferenceBuilder refBuild = new ReferenceBuilder(sourceHandler);
1620
        for (int i=0;i<children.getLength();i++){
1621
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
1622
                String ref = children.item(i).getTextContent().trim();
1623
                refBuild.builReference(ref, treatmentMainName, nomenclaturalCode,  acceptedTaxon, refMods);
1624
                if (!refBuild.isFoundBibref()){
1625
                    extractReferenceRawText(children.item(i).getChildNodes(), nameToBeFilled, refMods, acceptedTaxon);
1626
                }
1627
            }
1628

    
1629
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1630
                NodeList references = children.item(i).getChildNodes();
1631
                String descr="";
1632
                for (int j=0;j<references.getLength();j++){
1633
                    if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1634
                        String ref = references.item(j).getTextContent().trim();
1635
                        refBuild.builReference(ref, treatmentMainName,  nomenclaturalCode,  acceptedTaxon, refMods);
1636
                    }
1637
                    else
1638
                        if (references.item(j).getNodeName().equalsIgnoreCase("#text")
1639
                                && !references.item(j).getTextContent().trim().isEmpty()){
1640
                            descr += references.item(j).getTextContent().trim();
1641
                        }
1642

    
1643
                }
1644
                if (!refBuild.isFoundBibref()){
1645
                    //if it's not tagged, put it as row information.
1646
                    //                    extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1647
                    //then put it as a not markup feature if not empty
1648
                    if (!stringIsEmpty(descr.trim())){
1649
                        Feature currentFeature= getNotMarkedUpFeatureObject();
1650
                        setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1651
                    }
1652
                }
1653
            }
1654
        }
1655
        //        importer.getClassificationService().saveOrUpdate(classification);
1656
        return acceptedTaxon;
1657

    
1658
    }
1659

    
1660
    /**
1661
     * get the non viral name according to the current nomenclature
1662
     * @return
1663
     */
1664

    
1665
    private INonViralName getNonViralNameAccNomenclature() {
1666
    	return nomenclaturalCode.getNewTaxonNameInstance(null);
1667
    }
1668

    
1669
    /**
1670
     * @return the feature object for the category "not marked up"
1671
     */
1672
    private Feature getNotMarkedUpFeatureObject() {
1673
    	// FIXME use getFeature(uuid ....)
1674
        logger.info("getNotMarkedUpFeatureObject");
1675
        Feature currentFeature = (Feature)importer.getTermService().find(NotMarkedUpUUID);
1676
        if (currentFeature == null) {
1677
            currentFeature=Feature.NewInstance(notMarkedUp, notMarkedUp, notMarkedUp);
1678
            currentFeature.setUuid(NotMarkedUpUUID);
1679
            //TODO use userDefined Feature Vocabulary
1680
            Feature.DISTRIBUTION().getVocabulary().addTerm(currentFeature);
1681
//            importer.getTermService().saveOrUpdate(currentFeature);
1682
            importer.getVocabularyService().saveOrUpdate(currentFeature.getVocabulary());
1683
        }
1684
        return currentFeature;
1685
    }
1686

    
1687
    /**
1688
     * @param references
1689
     * handle cases where the bibref are inside <p> and outside
1690
     */
1691
    @SuppressWarnings("rawtypes")
1692
    private void extractReferenceRawText(NodeList references, INonViralName nameToBeFilled, Reference refMods,
1693
            Taxon acceptedTaxon) {
1694
        logger.info("extractReferenceRawText");
1695
        String refString="";
1696
        currentMyName= new MyName(true);
1697
        for (int j=0;j<references.getLength();j++){
1698
            acceptedTaxon=CdmBase.deproxy(acceptedTaxon, Taxon.class);
1699
            //no bibref tag inside
1700
            //            System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1701
            if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1702

    
1703
                try {
1704
                	String followingText = null;  //needs to be checked if follText is possible
1705
                	//TODO create or not create?
1706
                    currentMyName = extractScientificName(references.item(j), refMods, followingText);
1707
                } catch (TransformerFactoryConfigurationError e) {
1708
                    logger.warn(e);
1709
                } catch (TransformerException e) {
1710
                    logger.warn(e);
1711
                }
1712

    
1713
                //                name=name.trim();
1714
            }
1715
            if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1716
                refString = references.item(j).getTextContent().trim();
1717
            }
1718
            if(references.item(j).getNodeName().equalsIgnoreCase("#text") && !references.item(j).getTextContent().trim().isEmpty()){
1719
                //
1720
               if (!currentMyName.getStatus().isEmpty()){
1721
            	   String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1722
	               	if (nomNovStatus != null){
1723
	               		nameToBeFilled.setAppendedPhrase(nomNovStatus);
1724
	               	}else{
1725
	            	   try {
1726
	                        NomenclaturalStatusType  statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1727
                            nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1728
	                    } catch (UnknownCdmTypeException e) {
1729
	                        addProblematicStatusToFile(currentMyName.getStatus());
1730
	                        logger.warn("Problem with status");
1731
	                    }
1732
	               	}
1733
                }
1734

    
1735
                String fullLineRefName = references.item(j).getTextContent().trim();
1736
                int nameOrRefOrOther=2;
1737
                nameOrRefOrOther=askIfNameContained(fullLineRefName);
1738
                if (nameOrRefOrOther==0){
1739
                    TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1740
                    Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1741

    
1742
                    Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1743
                    //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1744
                    boolean synoExist = false;
1745
                    for (Synonym syn: synonymsSet){
1746
                        //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1747
                        boolean a =syn.getName().equals(synonym.getName());
1748
                        boolean b = syn.getSec().equals(synonym.getSec());
1749
                        if (a && b) {
1750
                            synoExist=true;
1751
                        }
1752
                    }
1753
                    if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1754
                        sourceHandler.addSource(refMods, synonym);
1755

    
1756
                        acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1757
                    }
1758
                }
1759

    
1760
                if (nameOrRefOrOther==1){
1761
                    Reference re = ReferenceFactory.newGeneric();
1762
                    re.setTitleCache(fullLineRefName, true);
1763

    
1764
                    /* TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1765
                    if (nameTBF.hasProblem() &&
1766
                            !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1767
                        addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1768
                        nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1769
                    }
1770
                    nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1771
                     */
1772
                    TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1773
                    Synonym synonym = Synonym.NewInstance(nameTBF, re);
1774

    
1775
                    Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1776
                    //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1777
                    boolean synoExist = false;
1778
                    for (Synonym syn: synonymsSet){
1779
                        //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1780
                        boolean a =syn.getName().equals(synonym.getName());
1781
                        boolean b = syn.getSec().equals(synonym.getSec());
1782
                        if (a && b) {
1783
                            synoExist=true;
1784
                        }
1785
                    }
1786
                    if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1787
                        sourceHandler.addSource(refMods, synonym);
1788

    
1789
                        acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1790
                    }
1791

    
1792
                }
1793

    
1794

    
1795
                if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1796
                    setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1797
                }
1798
            }
1799

    
1800
            if(!currentMyName.getName().isEmpty()){
1801
                //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
1802
                if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName.getName().trim())){
1803
                    Reference refS = ReferenceFactory.newGeneric();
1804
                    refS.setTitleCache(refString, true);
1805
                    //                            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1806
                    //                            acceptedTaxon.addDescription(td);
1807
                    //                            acceptedTaxon.addSource(refSource);
1808
                    //
1809
                    //                            TextData textData = TextData.NewInstance(Feature.CITATION());
1810
                    //
1811
                    //                            textData.addSource(null, null, refS, null);
1812
                    //                            td.addElement(textData);
1813
                    //                            td.addSource(refSource);
1814
                    //                            importer.getDescriptionService().saveOrUpdate(td);
1815

    
1816

    
1817
                    if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1818
                        setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1819

    
1820
                    }
1821

    
1822
                    acceptedTaxon.getName().setNomenclaturalReference(refS);
1823
                }else{
1824
                    TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1825
                    Synonym synonym = null;
1826
                    if (! currentMyName.getStatus().isEmpty()){
1827
                    	String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1828
                    	if (nomNovStatus != null){
1829
                    		nameToBeFilled.setAppendedPhrase(nomNovStatus);
1830
                    	}else{
1831
	                    	try {
1832
	                            NomenclaturalStatusType statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1833
	                            nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1834
	                            synonym = Synonym.NewInstance(nameTBF, refMods);
1835
	                        } catch (UnknownCdmTypeException e) {
1836
	                            addProblematicStatusToFile(currentMyName.getStatus());
1837
	                            logger.warn("Problem with status");
1838
	                            synonym = Synonym.NewInstance(nameTBF, refMods);
1839
	                            synonym.setAppendedPhrase(currentMyName.getStatus());
1840
	                        }
1841
                    	}
1842
                    }else{
1843
                        synonym =  Synonym.NewInstance(nameTBF, refMods);
1844
                    }
1845

    
1846

    
1847
                    if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1848
                        setLSID(currentMyName.getIdentifier(), synonym);
1849
                    }
1850

    
1851
                    Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1852
                    //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1853
                    boolean synoExist = false;
1854
                    for (Synonym syn: synonymsSet){
1855
                        //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1856
                        boolean a =syn.getName().equals(synonym.getName());
1857
                        boolean b = syn.getSec().equals(synonym.getSec());
1858
                        if (a && b) {
1859
                            synoExist=true;
1860
                        }
1861
                    }
1862
                    if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1863
                        sourceHandler.addSource(refMods, synonym);
1864

    
1865
                        acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1866
                    }
1867
                }
1868
            }
1869
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1870
        }
1871
    }
1872

    
1873

    
1874

    
1875
    /**
1876
     * @param identifier
1877
     * @param acceptedTaxon
1878
     */
1879
    @SuppressWarnings("rawtypes")
1880
    private void setLSID(String identifier, TaxonBase<?> taxon) {
1881
        //logger.info("setLSID");
1882
        //        boolean lsidok=false;
1883
        String id = identifier.split("__")[0];
1884
        String source = identifier.split("__")[1];
1885
        if (id.indexOf("lsid")>-1){
1886
            try {
1887
                LSID lsid = new LSID(id);
1888
                taxon.setLsid(lsid);
1889
                //                lsidok=true;
1890
            } catch (MalformedLSIDException e) {
1891
                logger.warn("Malformed LSID");
1892
            }
1893

    
1894
        }
1895

    
1896
        //logger.info("search reference for LSID");
1897
        //  if ((id.indexOf("lsid")<0) || !lsidok){
1898
        //ADD ORIGINAL SOURCE ID EVEN IF LSID
1899
        Reference re = null;
1900
        Pager<Reference> references = importer.getReferenceService().findByTitle(Reference.class, source, MatchMode.EXACT, null, 1, null, null, null);
1901
        if( references !=null && references.getCount()>0){
1902
            re=references.getRecords().get(0);
1903
        }
1904
        //logger.info("search reference for LSID-end");
1905
        if(re == null){
1906
            re = ReferenceFactory.newGeneric();
1907
            re.setTitleCache(source, true);
1908
            importer.getReferenceService().saveOrUpdate(re);
1909
        }
1910
        re=CdmBase.deproxy(re, Reference.class);
1911

    
1912
        //logger.info("search source for LSID");
1913
        Set<IdentifiableSource> sources = taxon.getSources();
1914
        boolean lsidinsource=false;
1915
        boolean urlinsource=false;
1916
        for (IdentifiableSource src:sources){
1917
            if (id.equalsIgnoreCase(src.getIdInSource()) && re.getTitleCache().equals(src.getCitation().getTitleCache())) {
1918
                lsidinsource=true;
1919
            }
1920
            if (src.getIdInSource() == null && re.getTitleCache().equals(sourceUrlRef.getTitleCache())) {
1921
                urlinsource=true;
1922
            }
1923
        }
1924
        if(!lsidinsource) {
1925
            taxon.addSource(OriginalSourceType.Import, id,null,re,null);
1926
        }
1927
        if(!urlinsource)
1928
        {
1929
            sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
1930
            taxon.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
1931
            // }
1932
        }
1933

    
1934
    }
1935

    
1936
    /**
1937
     * try to solve a parsing problem for a scientific name
1938
     * @param original : the name from the OCR document
1939
     * @param name : the tagged version
1940
     * @param parser
1941
     * @return the corrected TaxonNameBase
1942
     */
1943
    /*   @SuppressWarnings({ "unchecked", "rawtypes" })
1944
    private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
1945
        Map<String,String> ato = namesMap.get(original);
1946
        if (ato == null) {
1947
            ato = namesMap.get(original+" "+author);
1948
        }
1949

    
1950

    
1951
        if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
1952
            rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1953
        }
1954
        if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
1955
            rank = getRank(ato);
1956
        }
1957
        //        TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1958
        TaxonNameBase<?,?> nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1959
        //                logger.info("RANK: "+rank);
1960
        int retry=0;
1961
        List<ParserProblem> problems = nameTBF.getParsingProblems();
1962
        for (ParserProblem pb:problems) {
1963
            System.out.println(pb.toString());
1964
        }
1965
        while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1966
            addProblemNameToFile(name,author,nomenclaturalCode,rank);
1967
            String fullname=name;
1968
            if(! skippQuestion) {
1969
                fullname =  getFullReference(name,nameTBF.getParsingProblems());
1970
            }
1971
            if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1972
                nameTBF = TaxonNameFactory.NewBotanicalInstance(null);
1973
            }
1974
            if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1975
                nameTBF = TaxonNameFactory.NewZoologicalInstance(null);
1976
            }
1977
            if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1978
                nameTBF= TaxonNameFactory.NewBacterialInstance(null);
1979
            }
1980
            parser.parseReferencedName(nameTBF, fullname, rank, false);
1981
            retry++;
1982
        }
1983
        if (retry == 1){
1984
            if(author != null){
1985
                if (name.indexOf(author)>-1) {
1986
                    nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
1987
                } else {
1988
                    nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1989
                }
1990
                if (nameTBF.hasProblem()){
1991
                    if (name.indexOf(author)>-1) {
1992
                        addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
1993
                    } else {
1994
                        addProblemNameToFile(name,author,nomenclaturalCode,rank);
1995
                    }
1996
                    //                    System.out.println("TBF still has problems "+nameTBF.hasProblem());
1997
                    problems = nameTBF.getParsingProblems();
1998
                    for (ParserProblem pb:problems) {
1999
                        System.out.println(pb.toString());
2000
                    }
2001
                    nameTBF.setFullTitleCache(name, true);
2002
                }else{
2003
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2004
                        ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2005
                    }
2006
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2007
                        ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2008
                    }
2009
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2010
                        ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2011
                    }
2012
                }
2013
                //                    logger.info("FULL TITLE CACHE "+name);
2014
            }else{
2015
                nameTBF.setFullTitleCache(name, true);
2016
            }
2017
        }
2018
        return nameTBF;
2019
    }
2020

    
2021
     */
2022

    
2023
    /**
2024
     * @param nomenclatureNode: the XML nodes
2025
     * @param nametosave: the list of objects to save into the CDM
2026
     * @param refMods: the current reference extracted from the MODS
2027
     * @return
2028
     */
2029
    @SuppressWarnings({ "rawtypes" })
2030
    private Taxon extractNomenclature(Node nomenclatureNode,  List<TaxonNameBase> nametosave, Reference refMods) throws ClassCastException{
2031
        refMods=CdmBase.deproxy(refMods, Reference.class);
2032

    
2033
        logger.info("extractNomenclature");
2034
        NodeList children = nomenclatureNode.getChildNodes();
2035
        String freetext="";
2036
        Taxon acceptedTaxon = null;
2037
        //   INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2038

    
2039
        //        String fullContent = nomenclatureNode.getTextContent();
2040

    
2041
        NomenclaturalStatusType statusType = null;
2042
        String newNameStatus = null;
2043
        //TODO
2044
        for (int i=0;i<children.getLength();i++){
2045
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:status")){
2046
                String status = children.item(i).getTextContent().trim();
2047

    
2048
                if (!status.isEmpty()){
2049
                	if (newNameStatus(status) != null){
2050
                		newNameStatus = newNameStatus(status);
2051
                    }else{
2052
	                    try {
2053
	                        statusType = nomStatusString2NomStatus(status);
2054
	                    } catch (UnknownCdmTypeException e) {
2055
	//                    	nomNovStatus;
2056
	                    	addProblematicStatusToFile(status);
2057
	                        logger.warn("Problem with status: " + status);
2058
	                    }
2059
                    }
2060
                }
2061
            }
2062
        }
2063

    
2064
        boolean containsSynonyms=false;
2065
        boolean wasSynonym = false;
2066
        usedFollowingTextPrefix = null;  //reset
2067

    
2068
        for (int i=0; i<children.getLength(); i++){
2069
        	Node childNode = children.item(i);
2070
        	String childName = childNode.getNodeName();
2071

    
2072

    
2073
        	//following text
2074
        	followingText = null;
2075
        	if ( i + 1 < children.getLength()){
2076
            	Node followingTextNode = children.item(i +1);
2077
            	if (followingTextNode.getNodeName().equals("#text") && !followingTextNode.getTextContent().matches("\\s*") ){
2078
            		followingText = followingTextNode.getTextContent();
2079
            	}
2080
        	}
2081

    
2082
        	//traverse nodes
2083
            if (childName.equalsIgnoreCase("#text")) {
2084
                freetext = childNode.getTextContent().trim();
2085
                if (usedFollowingTextPrefix != null && freetext.startsWith(usedFollowingTextPrefix)){
2086
                	freetext = freetext.substring(usedFollowingTextPrefix.length());
2087
                }
2088
                usedFollowingTextPrefix = null;  //reset
2089
            }else if (childName.equalsIgnoreCase("tax:collection_event")) {
2090
                //                System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2091
                extractMaterialsDirect(childNode, acceptedTaxon, refMods, "collection", currentMyName.getTaxonNameBase());
2092
            }else if(childName.equalsIgnoreCase("tax:name")){
2093
                INonViralName nameToBeFilled;
2094
                //System.out.println("HANDLE FIRST NAME OF THE LIST");
2095
                if(!containsSynonyms){
2096
                	wasSynonym = false;
2097

    
2098
                	//System.out.println("I : "+i);
2099
                    currentMyName = new MyName(false);
2100
                    try {
2101
                        currentMyName = extractScientificName(childNode, refMods, followingText);
2102
                        treatmentMainName = currentMyName.getNewName();
2103
                        originalTreatmentName = currentMyName.getOriginalName();
2104

    
2105
                    } catch (TransformerFactoryConfigurationError e1) {
2106
                        throw new RuntimeException(e1);
2107
                    } catch (TransformerException e1) {
2108
                    	throw new RuntimeException(e1);
2109
                    }
2110

    
2111
                    if (currentMyName.getRank().equals(Rank.UNKNOWN_RANK()) || currentMyName.getRank().isLower(state2.getConfig().getMaxRank()) || currentMyName.getRank().equals(state2.getConfig().getMaxRank())){
2112
                        maxRankRespected=true;
2113

    
2114
                        nameToBeFilled=currentMyName.getTaxonNameBase();
2115

    
2116
                        //   acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2117
                        acceptedTaxon=currentMyName.getTaxon();
2118
                        //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2119

    
2120

    
2121
                        boolean statusMatch=false;
2122
                        if(acceptedTaxon !=null ){
2123
                            acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2124
                            statusMatch=compareStatus(acceptedTaxon, statusType);
2125
                            //System.out.println("statusMatch: "+statusMatch);
2126
                        }
2127
                        if (acceptedTaxon ==null || (acceptedTaxon != null && !statusMatch)){
2128

    
2129
                            nameToBeFilled=currentMyName.getTaxonNameBase();
2130
                            if (nameToBeFilled != null){
2131
                                if (!originalTreatmentName.isEmpty()) {
2132
                                    TaxonNameDescription td = TaxonNameDescription.NewInstance();
2133
                                    td.setTitleCache(originalTreatmentName, true);
2134
                                    nameToBeFilled.addDescription(td);
2135
                                }
2136

    
2137
                                if(statusType != null) {
2138
                                    nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2139
                                }
2140
                                if(newNameStatus != null){
2141
                                	nameToBeFilled.setAppendedPhrase(newNameStatus);
2142
                                }
2143
                                sourceHandler.addSource(refMods, nameToBeFilled);
2144

    
2145
                                if (nameToBeFilled.getNomenclaturalReference() == null) {
2146
                                    acceptedTaxon= Taxon.NewInstance(nameToBeFilled,refMods);
2147
                                    //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2148
                                }
2149
                                else {
2150
                                    acceptedTaxon= Taxon.NewInstance(nameToBeFilled,(Reference) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
2151
                                    //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2152
                                }
2153

    
2154
                                sourceHandler.addSource(refMods, acceptedTaxon);
2155

    
2156
                                if(!state2.getConfig().doKeepOriginalSecundum()) {
2157
                                    acceptedTaxon.setSec(state2.getConfig().getSecundum());
2158
                                    //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2159
                                    //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2160
                                }
2161

    
2162
                                if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2163
                                    setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2164
                                }
2165

    
2166

    
2167
                                importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2168
                                acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2169
                            }
2170

    
2171
                        }else{
2172
                            acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2173
                            Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2174
                            boolean sourcelinked=false;
2175
                            for (IdentifiableSource source:sources){
2176
                                if (source.getCitation().getTitleCache().equalsIgnoreCase(refMods.getTitleCache())) {
2177
                                    sourcelinked=true;
2178
                                }
2179
                            }
2180
                            if (!state2.getConfig().doKeepOriginalSecundum()) {
2181
                                acceptedTaxon.setSec(state2.getConfig().getSecundum());
2182
                                //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2183
                                //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2184
                            }
2185
                            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2186

    
2187
                            if (!sourcelinked){
2188
                                sourceHandler.addSource(refMods, acceptedTaxon);
2189
                            }
2190
                            if (!sourcelinked || !state2.getConfig().doKeepOriginalSecundum()){
2191

    
2192
                                if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2193
                                    //FIXME are these identifiers really related to taxa, not names? Exiting LSIDs come from Zoobank, urn:lsid:biosci.ohio-state.edu:osuc_concepts:134826 (Ants)
2194
                                	setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2195
                                }
2196
                                importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2197
                            }
2198
                        }
2199
                    }else{
2200
                        maxRankRespected=false;
2201
                    }
2202
                    containsSynonyms=true;  //all folowing names are handled as synonyms
2203
                }else{
2204
                    try{
2205
                        extractSynonyms(childNode, acceptedTaxon, refMods, followingText);
2206
                        wasSynonym = true;
2207

    
2208
                    }catch(NullPointerException e){
2209
                        logger.warn("null pointer exception, the accepted taxon might be null");
2210
                    }
2211
                }
2212
                containsSynonyms=true;
2213
            }else if (childName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
2214
                reloadClassification();
2215
                //extract the References within the document
2216
                extractReferences(childNode,nametosave,acceptedTaxon,refMods);
2217
            }else if (childName.equalsIgnoreCase("tax:bibref")){
2218
            	logger.warn(childName + " still preliminary");
2219

    
2220
            	INonViralName currentName = currentMyName == null ? null : currentMyName.getTaxonNameBase();
2221
            	boolean handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2222
            	if (! handled){
2223
            		setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2224
            	}
2225
            }else{
2226
            	logger.warn(childName + " not yet handled");
2227
            }
2228
            if(!stringIsEmpty(freetext.trim())) {;
2229
                if (! freetext.matches("\\d\\.?")){
2230
                    INonViralName currentName = currentMyName == null ? null : currentMyName.getTaxonNameBase();
2231
                	boolean handled = false;
2232
                	if (currentName != null && !wasSynonym){
2233
                		handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2234
                	}
2235
                	if (! handled){
2236
                		setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2237
                	}
2238
                }
2239

    
2240
                 freetext = "";
2241
            }
2242

    
2243
        }
2244
        //importer.getClassificationService().saveOrUpdate(classification);
2245
        return acceptedTaxon;
2246
    }
2247

    
2248

    
2249

    
2250

    
2251
	/**
2252
     * @return
2253
     */
2254

    
2255
    private boolean compareStatus(TaxonBase<?> t, NomenclaturalStatusType statusType) {
2256
        //logger.info("compareStatus");
2257
        boolean statusMatch=false;
2258
        //found one taxon
2259
        Set<NomenclaturalStatus> status = t.getName().getStatus();
2260
        if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
2261
            for (NomenclaturalStatus st:status){
2262
                NomenclaturalStatusType stype = st.getType();
2263
                if (stype.toString().equalsIgnoreCase(statusType.toString())) {
2264
                    statusMatch=true;
2265
                }
2266
            }
2267
        }
2268
        else{
2269
            if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
2270
                statusMatch=true;
2271
            }
2272
        }
2273
        return statusMatch;
2274
    }
2275

    
2276
    /**
2277
     * @param acceptedTaxon: the current acceptedTaxon
2278
     * @param ref: the current reference extracted from the MODS
2279
     * @return the parent for the current accepted taxon
2280
     */
2281
    /*  private Taxon createParent(Taxon acceptedTaxon, Reference ref) {
2282
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2283

    
2284
        List<Rank> rankList = new ArrayList<Rank>();
2285
        rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2286

    
2287
        List<String> rankListStr = new ArrayList<String>();
2288
        for (Rank r:rankList) {
2289
            rankListStr.add(r.toString());
2290
        }
2291
        String r="";
2292
        String s = acceptedTaxon.getTitleCache();
2293
        Taxon tax = null;
2294
        if(!skippQuestion){
2295
            int addTaxon = askAddParent(s);
2296
            logger.info("ADD TAXON: "+addTaxon);
2297
            if (addTaxon == 0 ){
2298
                Taxon tmp = askParent(acceptedTaxon, classification);
2299
                if (tmp == null){
2300
                    s = askSetParent(s);
2301
                    r = askRank(s,rankListStr);
2302

    
2303
                    TaxonNameBase<?,?> nameToBeFilled = null;
2304
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2305
                        nameToBeFilled = TaxonNameFactory.NewBotanicalInstance(null);
2306
                    }
2307
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2308
                        nameToBeFilled = TaxonNameFactory.NewZoologicalInstance(null);
2309
                    }
2310
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2311
                        nameToBeFilled = TaxonNameFactory.NewBacterialInstance(null);
2312
                    }
2313
                    nameToBeFilled.setTitleCache(s, true);
2314
                    nameToBeFilled.setRank(getRank(r), true);
2315

    
2316
                    tax = Taxon.NewInstance(nameToBeFilled, ref);
2317
                }
2318
                else{
2319
                    tax=tmp;
2320
                }
2321

    
2322
                createParent(tax, ref);
2323
                //            logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2324
                classification.addParentChild(tax, acceptedTaxon, ref, null);
2325
            }
2326
            else{
2327
                classification.addChildTaxon(acceptedTaxon, ref, null);
2328
                tax=acceptedTaxon;
2329
            }
2330
        } else{
2331
            classification.addChildTaxon(acceptedTaxon, ref, null);
2332
            tax=acceptedTaxon;
2333
        }
2334
        //        logger.info("RETURN: "+tax );
2335
        return tax;
2336

    
2337
    }
2338

    
2339
     */
2340

    
2341

    
2342
    private MyName  extractScientificNameSynonym(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2343
        //System.out.println("extractScientificNameSynonym");
2344
        logger.info("extractScientificNameSynonym");
2345
        String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2346
        List<String> rankListToPrint = new ArrayList<String>();
2347
        for (String r : rankListToPrint_tmp) {
2348
            rankListToPrint.add(r.toLowerCase());
2349
        }
2350

    
2351
        Rank rank = Rank.UNKNOWN_RANK();
2352
        NodeList children = name.getChildNodes();
2353
        String originalName="";
2354
        String fullName = "";
2355
        String newName="";
2356
        String identifier="";
2357
        HashMap<String, String> atomisedMap = new HashMap<String, String>();
2358
        List<String> atomisedName= new ArrayList<String>();
2359

    
2360
        String rankStr = "";
2361
        Rank tmpRank ;
2362

    
2363
        String status= extractStatus(children);
2364

    
2365
        for (int i=0;i<children.getLength();i++){
2366
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2367
                NodeList atom = children.item(i).getChildNodes();
2368
                for (int k=0;k<atom.getLength();k++){
2369
                    identifier = extractIdentifier(identifier, atom.item(k));
2370
                    tmpRank = null;
2371
                    rankStr = atom.item(k).getNodeName().toLowerCase();
2372
                    //                    logger.info("RANKSTR:*"+rankStr+"*");
2373
                    if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2374
                        rankStr=atom.item(k).getTextContent().trim();
2375
                        tmpRank = getRank(rankStr);
2376
                    }
2377
                    //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2378
                    if (tmpRank != null){
2379
                        rank=tmpRank;
2380
                    }
2381
                    atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2382
                }
2383
                addAtomisedNamesToMap(rankListToPrint, rank, atomisedName, atom);
2384
            }
2385
            if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2386
                //                logger.info("name non atomised: "+children.item(i).getTextContent());
2387
                fullName = children.item(i).getTextContent().trim();
2388
                //                logger.info("fullname: "+fullName);
2389
            }
2390
        }
2391
        originalName=fullName;
2392
        fullName = cleanName(fullName, atomisedName);
2393
        namesMap.put(fullName,atomisedMap);
2394

    
2395
        String atomisedNameStr = getAtomisedNameStr(atomisedName);
2396

    
2397
        if (fullName != null){
2398
            //            System.out.println("fullname: "+fullName);
2399
            //            System.out.println("atomised: "+atomisedNameStr);
2400
            if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2401
                if (skippQuestion){
2402
                    //                    String defaultN = "";
2403
                    if (atomisedNameStr.length()>fullName.length()) {
2404
                        newName=atomisedNameStr;
2405
                    } else {
2406
                        if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2407
                            newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2408
                        } else {
2409
                            newName=fullName;
2410
                        }
2411
                    }
2412
                } else {
2413
                    newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2414
                }
2415
            } else {
2416
                newName=fullName;
2417
            }
2418
        }
2419
        //not really needed
2420
        //        rank = askForRank(newName, rank, nomenclaturalCode);
2421
        //        System.out.println("atomised: "+atomisedMap.toString());
2422

    
2423
        //        String[] names = new String[5];
2424
        MyName myname = new MyName(true);
2425

    
2426
        //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2427
        //        System.out.println(atomisedMap.keySet());
2428
        fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2429
        myname.setOriginalName(fullName);
2430
        myname.setNewName(newName);
2431
        myname.setRank(rank);
2432
        myname.setIdentifier(identifier);
2433
        myname.setStatus(status);
2434
        myname.setSource(refMods);
2435

    
2436
        //        boolean higherAdded=false;
2437

    
2438

    
2439
        boolean parseNameManually=false;
2440
        INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
2441
        ITaxonNameBase  nameToBeFilledTest ;
2442

    
2443
        //if selected the atomised version
2444
        if(newName==atomisedNameStr){
2445
            nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2446
            if (nameToBeFilledTest.hasProblem()){
2447
                addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2448
                nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode, rank);
2449
                if (nameToBeFilledTest.hasProblem()){
2450
                    addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2451
                    parseNameManually=true;
2452
                }
2453
            }
2454
        }else{
2455
            nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2456
            if (nameToBeFilledTest.hasProblem()){
2457
                addProblemNameToFile("fullversion",fullName, nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2458
                nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2459
                parseNameManually=true;
2460
                if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2461
                    addNameDifferenceToFile(originalName,atomisedNameStr);
2462
                }
2463
            }
2464
        }
2465

    
2466
        if(parseNameManually){
2467
            //System.out.println("DO IT MANUALLY");
2468
        	if (this.state2.getConfig().isUseOldUnparsedSynonymExtraction()){
2469
                createUnparsedSynonym(rank, newName, atomisedMap, myname);
2470
        	}else{
2471
        		createUnparsedSynonymNew(rank, newName, atomisedMap, myname, refMods);;
2472
        	}
2473
        } else{
2474
            //System.out.println("AUTOMATIC!");
2475
            //            createAtomisedTaxonString(newName, atomisedMap, myname);
2476
            myname.setParsedName(nameToBeFilledTest);
2477
            myname.buildTaxon();
2478
        }
2479
        //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2480
        return myname;
2481
    }
2482

    
2483

    
2484
	/**
2485
     * @param name
2486
     * @throws TransformerFactoryConfigurationError
2487
     * @throws TransformerException
2488
     * @return a list of possible names
2489
     */
2490
    @SuppressWarnings({"rawtypes" })
2491
    private MyName extractScientificName(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2492
        logger.info("extractScientificName");
2493

    
2494
        String[] rankListToPrintLowerCase_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2495
        List<String> rankListToPrint = Arrays.asList(rankListToPrintLowerCase_tmp);
2496

    
2497
        Rank rank = Rank.UNKNOWN_RANK();
2498
        NodeList children = name.getChildNodes();
2499
        String originalName = "";
2500
        String fullName = "";
2501
        String newName = "";
2502
        String identifier = "";
2503
        HashMap<String, String> atomisedMap = new HashMap<String, String>();
2504
        List<String> atomisedNameList= new ArrayList<String>();
2505

    
2506
        String status= extractStatus(children);
2507

    
2508
        for (int i=0;i<children.getLength();i++){
2509
        	Node nameChild = children.item(i);
2510
            if(nameChild.getNodeName().equalsIgnoreCase("tax:xmldata")){
2511
                NodeList xmlDataChildren = nameChild.getChildNodes();
2512
                for (int k=0;k<xmlDataChildren.getLength();k++){
2513
                	Node xmlDataChild = xmlDataChildren.item(k);
2514
                    identifier = extractIdentifier(identifier, xmlDataChild);
2515
                    String rankStr = xmlDataChild.getNodeName().toLowerCase();
2516
                    if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2517
                        rankStr=xmlDataChild.getTextContent().trim();
2518
                        Rank tmpRank = getRank(rankStr);
2519
                        if (tmpRank != null){
2520
                            rank=tmpRank;
2521
                        }
2522
                    }
2523
                    //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2524

    
2525
                    atomisedMap.put(rankStr.toLowerCase(),xmlDataChild.getTextContent().trim());
2526
                }
2527
                addAtomisedNamesToMap(rankListToPrint, rank, atomisedNameList, xmlDataChildren);
2528
            }
2529
            else if(nameChild.getNodeName().equalsIgnoreCase("#text") && ! nameChild.getTextContent().matches("\\s*")){
2530
                //                logger.info("name non atomised: "+children.item(i).getTextContent());
2531
                fullName = nameChild.getTextContent().trim();
2532
                //                logger.info("fullname: "+fullName);
2533
            }
2534
        }
2535
        originalName=fullName;
2536
        fullName = cleanName(fullName, atomisedNameList);
2537
        namesMap.put(fullName,atomisedMap);
2538

    
2539
        String atomisedNameStr = getAtomisedNameStr(atomisedNameList);
2540

    
2541
        if (fullName != null){
2542
            if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2543
                if (skippQuestion){
2544
                    if (atomisedNameStr.length()>fullName.length()) {
2545
                        newName = atomisedNameStr;
2546
                    } else {
2547
                        if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2548
                            newName = askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2549
                        } else {
2550
                            newName = fullName;
2551
                        }
2552
                    }
2553
                } else {
2554
                    newName=askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2555
                }
2556
            } else {
2557
                newName=fullName;
2558
            }
2559
        }
2560
        //not really needed
2561
        //        rank = askForRank(newName, rank, nomenclaturalCode);
2562
        //        System.out.println("atomised: "+atomisedMap.toString());
2563

    
2564
        //        String[] names = new String[5];
2565
        MyName myname = new MyName(false);
2566

    
2567
        //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2568
        //        System.out.println(atomisedMap.keySet());
2569
        fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2570
        myname.setOriginalName(fullName);
2571
        myname.setNewName(newName);
2572

    
2573
        myname.setRank(rank);
2574
        myname.setIdentifier(identifier);
2575
        myname.setStatus(status);
2576
        myname.setSource(refMods);
2577

    
2578
        //        boolean higherAdded=false;
2579

    
2580

    
2581
        boolean parseNameManually=false;
2582
        INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2583
        ITaxonNameBase  nameToBeFilledTest = null;
2584

    
2585
        //if selected the atomised version
2586
        if(newName==atomisedNameStr){
2587
            nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2588
            if (nameToBeFilledTest.hasProblem()){
2589
        	    addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2590
                nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2591
                if (nameToBeFilledTest.hasProblem()){
2592
                    addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2593
                    parseNameManually=true;
2594
                }
2595
            }
2596
        }else{
2597
            nameToBeFilledTest = parseWithExtension(parser, fullName , rank, followingText, atomisedMap);
2598
            if (nameToBeFilledTest.hasProblem()){
2599
                addProblemNameToFile("fullversion",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2600
                nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2601
                parseNameManually=true;
2602
                if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2603
                    addNameDifferenceToFile(originalName,atomisedNameStr);
2604
                }
2605
            }
2606
        }
2607

    
2608
        //System.out.println("parseNameManually: "+parseNameManually);
2609
        if(parseNameManually){
2610
            createAtomisedTaxon(rank, newName, atomisedMap, myname);
2611
        }
2612
        else{
2613
            createAtomisedTaxonString(newName, atomisedMap, myname);
2614
            myname.setParsedName(nameToBeFilledTest);
2615
            //TODO correct handling of createIfNotExists
2616
           	myname.buildTaxon();
2617
        }
2618
        return myname;
2619

    
2620
    }
2621

    
2622
    private ITaxonNameBase parseWithExtension(INonViralNameParser parser, String atomisedNameStr, Rank rank, String followingText, HashMap<String, String> atomisedMap) {
2623
    	Object[] nameExtensionResult = getPossibleExtension(followingText, atomisedMap, nomenclaturalCode);
2624

    
2625
    	ITaxonNameBase name = parser.parseFullName(atomisedNameStr, nomenclaturalCode, rank);
2626
    	if (nameExtensionResult != null && nameExtensionResult[0] != null){
2627
    		String ext = (String)nameExtensionResult[0];
2628
    		ITaxonNameBase extName =parser.parseFullName(atomisedNameStr + " " + ext, nomenclaturalCode, rank);
2629
    		if (! extName.hasProblem()){
2630
    			name = extName;
2631
    			this.usedFollowingTextPrefix = ext;
2632
    			//TODO do we need to fill the atomisedMap at all?
2633
    			if ((Boolean)(nameExtensionResult[1])){
2634
    				//TODO
2635
    			}
2636
    			if ((Boolean)(nameExtensionResult[2])){
2637
    				//TODO BasionymYear etc.
2638
    				Integer origYear = ((ZoologicalName)name).getPublicationYear();
2639
    				if (origYear != null){
2640
        				atomisedMap.put(PUBLICATION_YEAR, origYear.toString());
2641
    				}
2642
    			}
2643
    		}
2644
    	}
2645
		return name;
2646
	}
2647

    
2648
	private Object[] getPossibleExtension(String followingText, HashMap<String, String> atomisedMap, NomenclaturalCode nomenclaturalCode) {
2649
		if (StringUtils.isBlank(followingText)){
2650
			return null;
2651
		}
2652

    
2653
    	boolean includeAuthor = true;
2654
    	boolean includeYear = false;
2655
		if (atomisedMap.containsKey("dwc:scientificnameauthorship")){
2656
			includeAuthor = false;
2657
		}
2658
    	if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2659
    		includeYear = true;
2660
    	}
2661
    	String patternStr = "";
2662
    	if (includeAuthor){
2663
    		patternStr += NonViralNameParserImplRegExBase.capitalWord;
2664
    	}
2665
    	if (includeYear){
2666
    		patternStr += "\\s*(,|\\s+)\\s*" + "(17|18|19|20)" + "\\d{2}" ;
2667
    	}
2668
    	String match = null;
2669
    	if (! patternStr.isEmpty()){
2670
    		Pattern pattern = Pattern.compile("^" + patternStr);
2671
    		Matcher matcher = pattern.matcher(followingText.trim());
2672
    		if (matcher.find()){
2673
    			match = matcher.group();
2674
    		}
2675
    	}
2676

    
2677
		return new Object[]{match, includeAuthor, includeYear};
2678
	}
2679

    
2680
	/**
2681
     * @param atomisedName
2682
     * @return
2683
     */
2684
    private String getAtomisedNameStr(List<String> atomisedName) {
2685
        //logger.info("getAtomisedNameStr");
2686
        String atomisedNameStr = StringUtils.join(atomisedName," ");
2687
        while(atomisedNameStr.contains("  ")) {
2688
            atomisedNameStr=atomisedNameStr.replace("  ", " ");
2689
        }
2690
        atomisedNameStr=atomisedNameStr.trim();
2691
        return atomisedNameStr;
2692
    }
2693

    
2694
    /**
2695
     * @param children
2696
     * @param status
2697
     * @return
2698
     */
2699
    private String extractStatus(NodeList children) {
2700
        logger.info("extractStatus");
2701
        String status="";
2702
        for (int i=0;i<children.getLength();i++){
2703
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:status") ||
2704
                    (children.item(i).getNodeName().equalsIgnoreCase("tax:namePart") &&
2705
                            children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2706
                status = children.item(i).getTextContent().trim();
2707
            }
2708
        }
2709
        return status;
2710
    }
2711

    
2712
    /**
2713
     * @param identifier
2714
     * @param atom
2715
     * @param k
2716
     * @return
2717
     */
2718
    private String extractIdentifier(String identifier, Node atom) {
2719
        //logger.info("extractIdentifier");
2720
        if (atom.getNodeName().equalsIgnoreCase("tax:xid")){
2721
            try{
2722
                identifier = atom.getAttributes().getNamedItem("identifier").getNodeValue();
2723
            }catch(Exception e){
2724
                System.out.println("pb with identifier, maybe empty");
2725
            }
2726
            try{
2727
                identifier+="__"+atom.getAttributes().getNamedItem("source").getNodeValue();
2728
            }catch(Exception e){
2729
                System.out.println("pb with identifier, maybe empty");
2730
            }
2731
        }
2732
        return identifier;
2733
    }
2734

    
2735
    /**
2736
     * @param rankListToPrint
2737
     * @param rank
2738
     * @param atomisedName
2739
     * @param atom
2740
     */
2741
    private void addAtomisedNamesToMap(List<String> rankListToPrint, Rank rank, List<String> atomisedName, NodeList atom) {
2742
        logger.info("addAtomisedNamesToMap");
2743
        for (int k=0;k<atom.getLength();k++){
2744
        	Node node = atom.item(k);
2745
        	String nodeName = node.getNodeName();
2746
            if (! nodeName.equalsIgnoreCase("dwc:taxonRank") ) {  //rank has been handled in higher method
2747
                if (nodeName.equalsIgnoreCase("dwc:subgenus") || nodeName.equalsIgnoreCase("dwcranks:subgenus")) {
2748
                    atomisedName.add("("+ node.getTextContent().trim()+")");
2749
                } else if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet") || nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2750
                       	if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet")){
2751
                            atomisedName.add("var. "+node.getTextContent().trim());
2752
                        }else if(nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2753
                            atomisedName.add("subsp. "+atom.item(k).getTextContent().trim());
2754
                        }
2755
                } else if(rankListToPrint.contains(nodeName.toLowerCase())) {
2756
                    atomisedName.add(node.getTextContent().trim());
2757
                } else{
2758
                    if (rank.isHigher(Rank.GENUS()) && (nodeName.indexOf("dwcranks:")>-1 || nodeName.indexOf("dwc:Family")>-1)) {
2759
                        atomisedName.add(node.getTextContent().trim());
2760
                    }else if (nodeName.equals("#text")){
2761
                    	String text = node.getTextContent();
2762
                    	if (StringUtils.isNotBlank(text)){
2763
                    		//TODO handle text
2764
                    		logger.warn("name xmldata contains text. This is unhandled");
2765
                    	}
2766
                    }else if (nodeName.matches("(?i)(dwc:Kingdom|dwc:Class|dwc:Order|dwc:Family)")){
2767
                    	//we currently do not use higher ranks information
2768
                    }else{
2769
                    	//TODO handle unhandled node
2770
                    	logger.warn("Unhandled node: " + nodeName);
2771
                    }
2772
                }
2773
            }
2774
        }
2775
    }
2776

    
2777
    /**
2778
     * @param fullName
2779
     * @param atomisedName
2780
     * @return
2781
     */
2782
    private String cleanName(String name, List<String> atomisedName) {
2783
        //logger.info("cleanName");
2784
        String fullName =name;
2785
        if (fullName != null){
2786
            fullName = fullName.replace("( ", "(");
2787
            fullName = fullName.replace(" )",")");
2788

    
2789
            if (fullName.trim().isEmpty()){
2790
                fullName=StringUtils.join(atomisedName," ");
2791
            }
2792

    
2793
            while(fullName.contains("  ")) {
2794
                fullName=fullName.replace("  ", " ");
2795
                //            logger.info("while");
2796
            }
2797
            fullName=fullName.trim();
2798
        }
2799
        return fullName;
2800
    }
2801

    
2802
    /**
2803
     * @param rank
2804
     * @param fullName
2805
     * @param atomisedMap
2806
     * @param myname
2807
     * @return
2808
     */
2809
    private String extractAuthorFromNames(Rank rank, String name, HashMap<String, String> atomisedMap, MyName myname) {
2810
        logger.info("extractAuthorFromNames");
2811
        String fullName=name;
2812
        if (atomisedMap.get("dwc:scientificnameauthorship") == null && fullName!=null){
2813
            //            System.out.println("rank : "+rank.toString());
2814
            if(rank.isHigher(Rank.SPECIES())){
2815
                try{
2816
                    String author=null;
2817
                    if(atomisedMap.get("dwcranks:subgenus") != null) {
2818
                        author = fullName.split(atomisedMap.get("dwcranks:subgenus"))[1].trim();
2819
                    }
2820
                    if(atomisedMap.get("dwc:subgenus") != null) {
2821
                        author = fullName.split(atomisedMap.get("dwc:subgenus"))[1].trim();
2822
                    }
2823
                    if(author == null) {
2824
                        if(atomisedMap.get("dwc:genus") != null) {
2825
                            author = fullName.split(atomisedMap.get("dwc:genus"))[1].trim();
2826
                        }
2827
                    }
2828
                    if(author != null){
2829
                        fullName = fullName.substring(0, fullName.indexOf(author));
2830
                        author=author.replaceAll(",","").trim();
2831
                        myname.setAuthor(author);
2832
                    }
2833
                }catch(Exception e){
2834
                    //could not extract the author
2835
                }
2836
            }
2837
            if(rank.equals(Rank.SPECIES())){
2838
                try{
2839
                    String author=null;
2840
                    if(author == null) {
2841
                        if(atomisedMap.get("dwc:species") != null) {
2842
                            String[] t = fullName.split(atomisedMap.get("dwc:species"));
2843
                            //                            System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2844
                            author = fullName.split(atomisedMap.get("dwc:species"))[1].trim();
2845
                            //                            System.out.println("AUTEUR "+author);
2846
                        }
2847
                    }
2848
                    if(author != null){
2849
                        fullName = fullName.substring(0, fullName.indexOf(author));
2850
                        author=author.replaceAll(",","").trim();
2851
                        myname.setAuthor(author);
2852
                    }
2853
                }catch(Exception e){
2854
                    //could not extract the author
2855
                }
2856
            }
2857
        }else{
2858
            myname.setAuthor(atomisedMap.get("dwc:scientificnameauthorship"));
2859
        }
2860
        return fullName;
2861
    }
2862

    
2863
    /**
2864
     * @param newName
2865
     * @param atomisedMap
2866
     * @param myname
2867
     */
2868
    private void createAtomisedTaxonString(String newName, HashMap<String, String> atomisedMap, MyName myname) {
2869
        logger.info("createAtomisedTaxonString "+atomisedMap);
2870
        if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
2871
            myname.setFamilyStr(atomisedMap.get("dwc:family"));
2872
        }
2873
        if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY())){
2874
            myname.setSubfamilyStr(atomisedMap.get("dwcranks:subfamily"));
2875
        }
2876
        if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
2877
            myname.setTribeStr(atomisedMap.get("dwcranks:tribe"));
2878
        }
2879
        if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
2880
            myname.setSubtribeStr(atomisedMap.get("dwcranks:subtribe"));
2881
        }
2882
        if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
2883
            myname.setGenusStr(atomisedMap.get("dwc:genus"));
2884
        }
2885
        if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2886
            myname.setSubgenusStr(atomisedMap.get("dwcranks:subgenus"));
2887
        }
2888
        if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2889
            myname.setSubgenusStr(atomisedMap.get("dwc:subgenus"));
2890
        }
2891
        if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
2892
            String n=newName;
2893
            if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2894
                n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2895
                n=n.replace("subsp.","");
2896
            }
2897
            if(atomisedMap.get("dwc:subspecies") != null) {
2898
                n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2899
                n=n.replace("subsp.","");
2900
            }
2901
            if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2902
                n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2903
                n=n.replace("var.","");
2904
                n=n.replace("v.","");
2905
            }
2906
            if(atomisedMap.get("dwcranks:formepithet") != null) {
2907
                //TODO
2908
                System.out.println("TODO FORMA");
2909
                n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
2910
                n=n.replace("forma","");
2911
            }
2912
            n=n.trim();
2913
            String author = myname.getAuthor();
2914
            if(n.split(" ").length>2){
2915

    
2916
                String n2=n.split(" ")[0]+" "+n.split(" ")[1];
2917
                String a= "";
2918
                try{
2919
                    a=n.split(n2)[1].trim();
2920
                }catch(Exception e){
2921
                    logger.info("no author in "+n+"?");}
2922

    
2923
                myname.setAuthor(a);
2924
                //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
2925
                n=n2;
2926

    
2927
            }
2928

    
2929
            myname.setSpeciesStr(atomisedMap.get("dwc:species"));
2930
            myname.setAuthor(author);
2931
        }
2932
        if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2933
            myname.setSubspeciesStr(atomisedMap.get("dwc:subspecies"));
2934
        }
2935
        if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2936
            myname.setSubspeciesStr(atomisedMap.get("dwc:infraspecificepithet"));
2937
        }
2938
        if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
2939
            myname.setVarietyStr(atomisedMap.get("dwcranks:varietyepithet"));
2940
        }
2941
        if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
2942
            myname.setFormStr(atomisedMap.get("dwcranks:formepithet"));
2943
        }
2944
        if (atomisedMap.get(PUBLICATION_YEAR) != null){
2945
        	myname.setPublicationYear(Integer.valueOf(atomisedMap.get(PUBLICATION_YEAR)));
2946
        }
2947
    }
2948

    
2949
    /**
2950
     * @see #createUnparsedSynonymNew(Rank, String, HashMap, MyName)
2951
     * @param rank
2952
     * @param newName
2953
     * @param atomisedMap
2954
     * @param myname
2955
     */
2956
    private void createUnparsedSynonym(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
2957
        logger.info("createSynonym");
2958
        //System.out.println("createsynonym");
2959
        if(rank.equals(Rank.UNKNOWN_RANK())){
2960
            myname.setNotParsableTaxon(newName);
2961
        }else{
2962
	        if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY()) && rank.equals(Rank.FAMILY())){
2963
	            myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
2964
	        }
2965
	        if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY()) && rank.equals(Rank.SUBFAMILY())){
2966
	            myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
2967
	        }
2968
	        if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE()) && rank.equals(Rank.TRIBE())){
2969
	            myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
2970
	        }
2971
	        if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE()) && rank.equals(Rank.SUBTRIBE())){
2972
	            myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
2973
	        }
2974
	        if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS()) && rank.equals(Rank.GENUS())){
2975
	            myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
2976
	        }
2977
	        if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2978
	            myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
2979
	        }
2980
	        if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2981
	            myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
2982
	        }
2983
	        if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES()) && rank.equals(Rank.SPECIES())){
2984
	            String n=newName;
2985
	            if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2986
	                n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2987
	                n=n.replace("subsp.","");
2988
	            }
2989
	            if(atomisedMap.get("dwc:subspecies") != null) {
2990
	                n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2991
	                n=n.replace("subsp.","");
2992
	            }
2993
	            if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2994
	                n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2995
	                n=n.replace("var.","");
2996
	                n=n.replace("v.","");
2997
	            }
2998
	            if(atomisedMap.get("dwcranks:formepithet") != null) {
2999
	                //TODO
3000
	                //System.out.println("TODO FORMA");
3001
	                n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3002
	                n=n.replace("forma","");
3003
	            }
3004
	            n=n.trim();
3005
	            String author = myname.getAuthor();
3006
	            if(n.split(" ").length>2){
3007

    
3008
	                String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3009
	                String a="";
3010
	                try{
3011
	                    a= n.split(n2)[1].trim();
3012
	                }catch(Exception e){logger.info("no author in "+n);}
3013
	                myname.setAuthor(a);
3014
	                //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3015
	                n=n2;
3016

    
3017
	            }
3018
	            Taxon species = myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank);
3019
	            myname.setSpecies(species);
3020
	            myname.setAuthor(author);
3021
	        }
3022
	        if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3023
	            myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3024
	        }
3025
	        if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3026
	            myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3027
	        }
3028
	        if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY()) && rank.equals(Rank.VARIETY())){
3029
	            myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3030
	        }
3031
	        if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM()) && rank.equals(Rank.FORM())){
3032
	            myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3033
	        }
3034
        }
3035

    
3036
    }
3037

    
3038

    
3039
    /**
3040
     * @param refMods
3041
     * @see #createUnparsedSynonym(Rank, String, HashMap, MyName)
3042
     * the original TaxonXImport extracted Synonyms by creating acc Taxa with partial names
3043
     * I (AM) do not understand this but don't want to destroy code which maybe works in some cases) there
3044
     * I created this switch for old
3045
     * for Spiders the new version is preferred
3046
     */
3047
    private void createUnparsedSynonymNew(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname, Reference refMods) {
3048
        logger.info("createSynonym");
3049

    
3050
        INonViralName nameToBeFilled = this.getNonViralNameAccNomenclature();
3051
        //System.out.println("createsynonym");
3052
        if(rank.equals(Rank.UNKNOWN_RANK())){
3053
            //TODO
3054
        	myname.setNotParsableTaxon(newName);
3055

    
3056
        	nameToBeFilled.setTitleCache(newName, true);
3057
        }else{
3058
        	if(atomisedMap.get("dwc:genus") != null ){
3059
    			nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:genus"));
3060
	        }
3061
        	if (rank.isSupraGeneric()){
3062
        		if (atomisedMap.get("dwcranks:subtribe") != null ){
3063
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3064
    	        }else if (atomisedMap.get("dwcranks:subtribe") != null ){
3065
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3066
    	        }else if (atomisedMap.get("dwcranks:tribe") != null ){
3067
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:tribe"));
3068
    	        }else if (atomisedMap.get("dwcranks:subfamily") != null ){
3069
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subfamily"));
3070
    	        }else if (atomisedMap.get("dwc:family") != null ){
3071
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:family"));
3072
        	    }else{
3073
        	    	logger.warn("Supra generic rank not yet handled or atomisation not available");
3074
        	    }
3075
        	}
3076
        	if (atomisedMap.get("dwcranks:subgenus") != null){
3077
        		nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwcranks:subgenus"));
3078
        	}
3079
        	if (atomisedMap.get("dwc:subgenus") != null){
3080
        		nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwc:subgenus"));
3081
        	}
3082
        	if (atomisedMap.get("dwc:species") != null){
3083
        		nameToBeFilled.setSpecificEpithet(atomisedMap.get("dwc:species"));
3084
        	}
3085
        	if (atomisedMap.get("dwcranks:formepithet") != null){
3086
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:formepithet"));
3087
        	}else if (atomisedMap.get("dwcranks:varietyepithet") != null){
3088
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:varietyepithet"));
3089
        	}else if (atomisedMap.get("dwc:infraspecificepithet") != null){
3090
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:infraspecificepithet"));
3091
        	}else if (atomisedMap.get("dwc:subspecies") != null){
3092
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:subspecies"));
3093
        	}
3094
            Reference sec = sourceUrlRef;
3095
            if(!state2.getConfig().doKeepOriginalSecundum()){
3096
                sec = state2.getConfig().getSecundum();
3097
            }
3098
        	Synonym syn = Synonym.NewInstance(nameToBeFilled, sec);
3099
//        	sourceHandler.addSource(refMods, syn);
3100
        	myname.setSyno(syn);
3101
        	myname.setSynonym(true);
3102
        }
3103
	}
3104

    
3105
    /**
3106
     * @param rank
3107
     * @param newName
3108
     * @param atomisedMap
3109
     * @param myname
3110
     */
3111
    private void createAtomisedTaxon(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
3112
        logger.info("createAtomisedTaxon "+atomisedMap);
3113
        if(rank.equals(Rank.UNKNOWN_RANK())){
3114
            myname.setNotParsableTaxon(newName);
3115
        }
3116
        else{
3117
            if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
3118
                myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
3119
            }
3120
            if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY())){
3121
                myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
3122
            }
3123
            if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
3124
                myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
3125
            }
3126
            if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
3127
                myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
3128
            }
3129
            if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
3130
                myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
3131
            }
3132
            if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3133
                myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
3134
            }
3135
            if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3136
                myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
3137
            }
3138
            if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
3139
                String n=newName;
3140
                if(atomisedMap.get("dwc:infraspecificepithet") != null) {
3141
                    n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
3142
                    n=n.replace("subsp.","");
3143
                }
3144
                if(atomisedMap.get("dwc:subspecies") != null) {
3145
                    n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
3146
                    n=n.replace("subsp.","");
3147
                }
3148
                if(atomisedMap.get("dwcranks:varietyepithet") != null) {
3149
                    n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
3150
                    n=n.replace("var.","");
3151
                    n=n.replace("v.","");
3152
                }
3153
                if(atomisedMap.get("dwcranks:formepithet") != null) {
3154
                    //TODO
3155
                    //System.out.println("TODO FORMA");
3156
                    n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3157
                    n=n.replace("forma","");
3158
                }
3159
                n=n.trim();
3160
                String author = myname.getAuthor();
3161
                if(n.split(" ").length>2){
3162
                    String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3163
                    String a="";
3164
                    try{
3165
                        a= n.split(n2)[1].trim();
3166
                    }catch(Exception e){logger.info("no author  in "+n);}
3167
                    myname.setAuthor(a);
3168
                    //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3169
                    n=n2;
3170

    
3171
                }
3172

    
3173
                myname.setSpecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank));
3174
                myname.setAuthor(author);
3175
            }
3176
            if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3177
                myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3178
            }
3179
            if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3180
                myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3181
            }
3182
            if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
3183
                myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3184
            }
3185
            if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
3186
                myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3187
            }
3188
        }
3189
    }
3190

    
3191
    /**
3192
     * @return
3193
     */
3194
    private boolean checkRankValidForImport(Rank currentRank) {
3195
        //logger.info("checkRankValidForImport");
3196
        return currentRank.isLower(state2.getConfig().getMaxRank()) || currentRank.equals(state2.getConfig().getMaxRank());
3197
    }
3198

    
3199

    
3200

    
3201
    /**
3202
     * @param classification2
3203
     */
3204
    public void updateClassification(Classification classification2) {
3205
        //logger.info("updateClassification");
3206
        classification = classification2;
3207
    }
3208

    
3209

    
3210

    
3211
    public class MyName {
3212
        /**
3213
         * @param isSynonym
3214
         */
3215
        public MyName(boolean isSynonym) {
3216
            super();
3217
            this.isSynonym = isSynonym;
3218
        }
3219

    
3220
        String originalName="";
3221
        String newName="";
3222
        Rank rank=Rank.UNKNOWN_RANK();
3223
        String identifier="";
3224
        String status="";
3225
        String author=null;
3226

    
3227
        TaxonNameBase<?,?> taxonNameBase;
3228

    
3229
        Reference refMods ;
3230

    
3231
        Taxon family,subfamily,tribe,subtribe,genus,subgenus,species,subspecies, variety,form;
3232
        INonViralName familyName, subfamilyName, tribeName,subtribeName,genusName,subgenusName,speciesName,subspeciesName;
3233
        String familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr;
3234
        Integer publicationYear;
3235

    
3236

    
3237
		Taxon higherTaxa;
3238
        Rank higherRank;
3239
        private Taxon taxon;
3240
        private Synonym syno;
3241

    
3242
        /**
3243
         * @return the syno
3244
         */
3245
        public Synonym getSyno() {
3246
            return syno;
3247
        }
3248

    
3249
        @Override
3250
        public String toString(){
3251
            List<String> tot=new ArrayList<String>();
3252
            String[] n= {familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr};
3253
            for (String elt:n){
3254
                if (!StringUtils.isEmpty(elt)) {
3255
                    tot.add(elt);
3256
                } else {
3257
                    tot.add("*");
3258
                }
3259
            }
3260
            return StringUtils.join(tot," ");
3261
        }
3262
        /**
3263
         * @param syno the syno to set
3264
         */
3265
        public void setSyno(Synonym syno) {
3266
            this.syno = syno;
3267
        }
3268

    
3269
        boolean isSynonym=false;
3270

    
3271
        /**
3272
         * @return the isSynonym
3273
         */
3274
        public boolean isSynonym() {
3275
            return isSynonym;
3276
        }
3277

    
3278
        /**
3279
         * @param isSynonym the isSynonym to set
3280
         */
3281
        public void setSynonym(boolean isSynonym) {
3282
            this.isSynonym = isSynonym;
3283
        }
3284

    
3285
        public void setSource(Reference re){
3286
            refMods=re;
3287
        }
3288

    
3289
        /**
3290
         * @param string
3291
         */
3292
        public void setFormStr(String string) {
3293
            this.formStr=string;
3294

    
3295
        }
3296
        /**
3297
         * @param string
3298
         */
3299
        public void setVarietyStr(String string) {
3300
            this.varietyStr=string;
3301

    
3302
        }
3303
        /**
3304
         * @param string
3305
         */
3306
        public void setSubspeciesStr(String string) {
3307
            this.subspeciesStr=string;
3308

    
3309
        }
3310
        /**
3311
         * @param string
3312
         */
3313
        public void setSpeciesStr(String string) {
3314
            this.speciesStr=string;
3315

    
3316
        }
3317
        /**
3318
         * @param string
3319
         */
3320
        public void setSubgenusStr(String string) {
3321
            this.subgenusStr=string;
3322

    
3323
        }
3324
        /**
3325
         * @param string
3326
         */
3327
        public void setGenusStr(String string) {
3328
            this.genusStr=string;
3329

    
3330
        }
3331
        /**
3332
         * @param string
3333
         */
3334
        public void setSubtribeStr(String string) {
3335
            this.subtribeStr=string;
3336

    
3337
        }
3338
        /**
3339
         * @param string
3340
         */
3341
        public void setTribeStr(String string) {
3342
            this.tribeStr=string;
3343

    
3344
        }
3345
        /**
3346
         * @param string
3347
         */
3348
        public void setSubfamilyStr(String string) {
3349
            this.subfamilyStr=string;
3350

    
3351
        }
3352
        /**
3353
         * @param string
3354
         */
3355
        public void setFamilyStr(String string) {
3356
            this.familyStr=string;
3357

    
3358
        }
3359
        /**
3360
         * @return the familyStr
3361
         */
3362
        public String getFamilyStr() {
3363
            return familyStr;
3364
        }
3365
        /**
3366
         * @return the subfamilyStr
3367
         */
3368
        public String getSubfamilyStr() {
3369
            return subfamilyStr;
3370
        }
3371
        /**
3372
         * @return the tribeStr
3373
         */
3374
        public String getTribeStr() {
3375
            return tribeStr;
3376
        }
3377
        /**
3378
         * @return the subtribeStr
3379
         */
3380
        public String getSubtribeStr() {
3381
            return subtribeStr;
3382
        }
3383
        /**
3384
         * @return the genusStr
3385
         */
3386
        public String getGenusStr() {
3387
            return genusStr;
3388
        }
3389
        /**
3390
         * @return the subgenusStr
3391
         */
3392
        public String getSubgenusStr() {
3393
            return subgenusStr;
3394
        }
3395
        /**
3396
         * @return the speciesStr
3397
         */
3398
        public String getSpeciesStr() {
3399
            return speciesStr;
3400
        }
3401
        /**
3402
         * @return the subspeciesStr
3403
         */
3404
        public String getSubspeciesStr() {
3405
            return subspeciesStr;
3406
        }
3407
        /**
3408
         * @return the formStr
3409
         */
3410
        public String getFormStr() {
3411
            return formStr;
3412
        }
3413
        /**
3414
         * @return the varietyStr
3415
         */
3416
        public String getVarietyStr() {
3417
            return varietyStr;
3418
        }
3419

    
3420
        public Integer getPublicationYear() {
3421
			return publicationYear;
3422
		}
3423

    
3424
		public void setPublicationYear(Integer publicationYear) {
3425
			this.publicationYear = publicationYear;
3426
		}
3427

    
3428
        /**
3429
         * @param newName2
3430
         */
3431
        public void setNotParsableTaxon(String newName2) {
3432
            //takes too much time
3433
            //            List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3434

    
3435
            NomenclaturalStatusType statusType = null;
3436
            if (!getStatus().isEmpty()){
3437
                try {
3438
                    statusType = nomStatusString2NomStatus(getStatus());
3439
                } catch (UnknownCdmTypeException e) {
3440
                    addProblematicStatusToFile(getStatus());
3441
                    logger.warn("Problem with status");
3442
                }
3443
            }
3444
            List<TaxonBase> tmpList = new ArrayList<>();
3445

    
3446
            Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, newName2, MatchMode.BEGINNING, null, null, null, null, null);
3447
            tmpList.addAll(taxontest.getRecords());
3448

    
3449
            //logger.info("tmpList returned: "+tmpList.size());
3450

    
3451

    
3452
            INonViralName identicName = null;
3453
            boolean foundIdentic=false;
3454
            TaxonBase<?> tmpTaxonBase=null;
3455
            //            Taxon tmpPartial=null;
3456
            for (TaxonBase<?> tmpb:tmpList){
3457
                if(tmpb !=null){
3458
                    TaxonNameBase<?,?> tnb =  tmpb.getName();
3459
                    Rank crank=null;
3460
                    if (tnb != null){
3461
                        if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2) ){
3462
                            crank =tnb.getRank();
3463
                            if (crank !=null && rank !=null){
3464
                                if (crank.equals(rank)){
3465
                                	identicName = tnb;
3466
                                	if (isSynonym && tmpb.isInstanceOf(Synonym.class) || !isSynonym && tmpb.isInstanceOf(Taxon.class)){
3467
                                		foundIdentic=true;
3468
                                		tmpTaxonBase=tmpb;
3469
                               			break;
3470
                                	}
3471
                                }
3472
                            }
3473
                        }
3474
                    }
3475
                }
3476
            }
3477
            boolean statusMatch=false;
3478
            boolean appendedMatch=false;
3479
            if(tmpTaxonBase !=null && foundIdentic){
3480
                statusMatch=compareStatus(tmpTaxonBase, statusType);
3481
                if (!getStatus().isEmpty() && ! (tmpTaxonBase.getAppendedPhrase() == null)) {
3482
                    appendedMatch=tmpTaxonBase.getAppendedPhrase().equals(getStatus());
3483
                }
3484
                if (getStatus().isEmpty() && tmpTaxonBase.getAppendedPhrase() == null) {
3485
                    appendedMatch=true;
3486
                }
3487

    
3488
            }
3489
            if ((tmpTaxonBase == null || !foundIdentic) ||  (tmpTaxonBase != null && !statusMatch) ||  (tmpTaxonBase != null && !appendedMatch && !statusMatch)){
3490

    
3491
            	INonViralName tnb;
3492
            	if (identicName == null){
3493
            		tnb = getNonViralNameAccNomenclature();
3494
            		tnb.setRank(rank);
3495

    
3496
	                if(statusType != null) {
3497
	                    tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3498
	                }
3499
	                if(StringUtils.isNotBlank(getStatus())) {
3500
	                    tnb.setAppendedPhrase(getStatus());
3501
	                }
3502
	                tnb.setTitleCache(newName2,true);
3503
	                tmpTaxonBase = findMatchingTaxon(tnb,refMods);
3504
	            }else{
3505
            		tnb = identicName;
3506
            	}
3507

    
3508
                if(tmpTaxonBase==null){
3509
                    tmpTaxonBase = isSynonym ? Synonym.NewInstance(tnb, refMods) : Taxon.NewInstance(tnb, refMods);
3510
                    if(!state2.getConfig().doKeepOriginalSecundum()) {
3511
                        tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3512
                    }
3513
                    //tmptaxonbase.setSec(refMods);
3514
                    if(!isSynonym) {
3515
                        classification.addChildTaxon((Taxon)tmpTaxonBase, null, null);
3516
                        sourceHandler.addSource(refMods, (Taxon)tmpTaxonBase);
3517
                    }
3518
                }
3519
            }
3520

    
3521
            tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3522
            if (author != null) {
3523
                if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3524
                    setLSID(getIdentifier(), tmpTaxonBase);
3525
                    importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3526
                    tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3527
                }
3528
            }
3529
            TaxonNameBase<?,?> tnb = CdmBase.deproxy(tmpTaxonBase.getName(), TaxonNameBase.class);
3530

    
3531
            if(!isSynonym) {
3532
                this.taxon=(Taxon)tmpTaxonBase;
3533
            } else {
3534
                if (tmpTaxonBase instanceof Taxon){
3535
                	logger.warn("Incorrect status");
3536
                }
3537
            	this.syno=(Synonym)tmpTaxonBase;
3538
            }
3539

    
3540
            taxonNameBase = tnb;
3541

    
3542
        }
3543

    
3544
        /**
3545
         *
3546
         */
3547
        public void buildTaxon() {
3548
            //System.out.println("BUILD TAXON");
3549
            logger.info("buildTaxon");
3550
            NomenclaturalStatusType statusType = null;
3551
            if (!getStatus().isEmpty()){
3552
            	status = getStatus();
3553
            	String newNameStatus = newNameStatus(status);
3554
            	if (newNameStatus != null){
3555
            		taxonNameBase.setAppendedPhrase(newNameStatus);
3556
            	}else{
3557
            		try {
3558
            			statusType = nomStatusString2NomStatus(getStatus());
3559
            			taxonNameBase.addStatus(NomenclaturalStatus.NewInstance(statusType));
3560
            		} catch (UnknownCdmTypeException e) {
3561
            			addProblematicStatusToFile(getStatus());
3562
            			logger.warn("Problem with status");
3563
            		}
3564
            	}
3565
            }
3566
            importer.getNameService().save(taxonNameBase);
3567

    
3568
            TaxonBase<?> tmpTaxonBase;
3569
            if (!isSynonym) {
3570
                tmpTaxonBase =Taxon.NewInstance(taxonNameBase, refMods); //sec set null
3571
            }
3572
            else {
3573
                tmpTaxonBase =Synonym.NewInstance(taxonNameBase, refMods); //sec set null
3574
            }
3575
            boolean exist = false;
3576
            if (!isSynonym){
3577
	            for (TaxonNode node : classification.getAllNodes()){
3578
	                try{
3579
	                	Taxon nodeTaxon = node.getTaxon();
3580
	                	boolean titleMatches = nodeTaxon.getTitleCache().equalsIgnoreCase(tmpTaxonBase.getTitleCache());
3581
	                	boolean nomStatusMatches = compareStatus(node.getTaxon(), statusType);
3582
	                	boolean nodeNameReplaceable = checkNodeNameReplaceable(nodeTaxon, tmpTaxonBase);
3583
	                    if(titleMatches && nomStatusMatches) {
3584
	                    	if (!isSynonym) {
3585
	                    		tmpTaxonBase=CdmBase.deproxy(nodeTaxon, TaxonBase.class);
3586
	                            exist =true;
3587
	                        } else {
3588
	                            logger.info("Found the same name but from another type (taxon/synonym)");
3589
	                            TaxonNameBase<?,?> existingTnb = getTaxon().getName();
3590
                                tmpTaxonBase = Synonym.NewInstance(existingTnb, refMods);
3591
                                importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3592
                                exist =true;
3593
                            }
3594
	                    }else if (nodeNameReplaceable){
3595
	                    	nodeTaxon.setName(tmpTaxonBase.getName());
3596
	                    	tmpTaxonBase = nodeTaxon;
3597
	                    	exist = true;
3598
	                    }
3599
	                }catch(NullPointerException n){logger.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3600
	            }
3601
            }
3602
            if (!exist){
3603

    
3604
                boolean insertAsExisting =false;
3605
                List<Taxon> existingTaxons=new ArrayList<Taxon>();
3606
                try {
3607
                    existingTaxons = getMatchingTaxa(taxonNameBase);
3608
                } catch (Exception e1) {
3609
                    e1.printStackTrace();
3610
                }
3611
                double similarityScore=0.0;
3612
                double similarityAuthor=-1;
3613
                String author1="";
3614
                String author2="";
3615
                String t1="";
3616
                String t2="";
3617
                for (Taxon bestMatchingTaxon : existingTaxons){
3618
                    //System.out.println("tnbase "+taxonnamebase.getTitleCache());
3619
                    //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3620
                    if(taxonNameBase.getAuthorshipCache()!=null) {
3621
                    	author1=taxonNameBase.getAuthorshipCache();
3622
                    }
3623
                    try {
3624
                        if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
3625
                            author2=bestMatchingTaxon.getName().getAuthorshipCache();
3626
                        }
3627
                    } catch (Exception e) {
3628
                        // TODO Auto-generated catch block
3629
                        e.printStackTrace();
3630
                    }
3631
                    try {
3632
                        t1=taxonNameBase.getTitleCache();
3633
                        if (author1!=null && !StringUtils.isEmpty(author1)) {
3634
                            t1=t1.split(Pattern.quote(author1))[0];
3635
                        }
3636
                    } catch (Exception e) {
3637
                        // TODO Auto-generated catch block
3638
                        e.printStackTrace();
3639
                    }
3640
                    try {
3641
                        t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
3642
                        if (author2!=null && !StringUtils.isEmpty(author2)) {
3643
                            t2=t2.split(Pattern.quote(author2))[0];
3644
                        }
3645
                    } catch (Exception e) {
3646
                        // TODO Auto-generated catch block
3647
                        e.printStackTrace();
3648
                    }
3649

    
3650
                    similarityScore=similarity(t1.trim(), t2.trim());
3651
                    //System.out.println("taxonscore "+similarityScore);
3652
                    similarityAuthor=similarity(author1.trim(), author2.trim());
3653
                    //System.out.println("authorscore "+similarityAuthor);
3654
                    insertAsExisting = compareAndCheckTaxon(taxonNameBase, refMods, similarityScore, bestMatchingTaxon, similarityAuthor);
3655
                    if(insertAsExisting) {
3656
                        tmpTaxonBase=bestMatchingTaxon;
3657
                        break;
3658
                    }
3659
                }
3660
                if ( !insertAsExisting ){
3661
                    if(!state2.getConfig().doKeepOriginalSecundum()) {
3662
                        tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3663
                    }
3664

    
3665
                    //                    tmptaxonbase.setSec(refMods);
3666
                    if (taxonNameBase.getRank().equals(state2.getConfig().getMaxRank())) {
3667
                        //System.out.println("****************************"+tmptaxonbase);
3668
                        if (!isSynonym) {
3669
                            classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3670
                        }
3671
                    } else{
3672
                        hierarchy = new HashMap<Rank, Taxon>();
3673
                        //System.out.println("LOOK FOR PARENT "+taxonnamebase.toString()+", "+tmptaxonbase.toString());
3674
                        if (!isSynonym){
3675
                            lookForParentNode(taxonNameBase,(Taxon)tmpTaxonBase, refMods,this);
3676
                            //System.out.println("HIERARCHY "+hierarchy);
3677
                            Taxon parent = buildHierarchy();
3678
                            if(!taxonExistsInClassification(parent,(Taxon)tmpTaxonBase)){
3679
                                if(parent !=null) {
3680
                                    classification.addParentChild(parent, (Taxon)tmpTaxonBase, refMods, null);
3681
                                } else {
3682
                                    classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3683
                                }
3684
                                importer.getClassificationService().saveOrUpdate(classification);
3685
                            }
3686
                        }
3687
                        //                        Set<TaxonNode> nodeList = classification.getAllNodes();
3688
                        //                        for(TaxonNode tn:nodeList) {
3689
                        //                            System.out.println(tn.getTaxon());
3690
                        //                        }
3691
                    }
3692
                }
3693
                importer.getClassificationService().saveOrUpdate(classification);
3694
                 if(isSynonym) {
3695
                    try{
3696
                        Synonym castTest=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3697
                    }catch(Exception e){
3698
                        TaxonNameBase<?,?> existingTnb = tmpTaxonBase.getName();
3699
                        Synonym castTest = Synonym.NewInstance(existingTnb, refMods);
3700
                        importer.getTaxonService().saveOrUpdate(castTest);
3701
                        tmpTaxonBase=CdmBase.deproxy(castTest, Synonym.class);
3702
                    }
3703
                }
3704
            }
3705
            if(!isSynonym) {
3706
                taxon=CdmBase.deproxy(tmpTaxonBase, Taxon.class);
3707
            } else {
3708
                syno=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3709
            }
3710

    
3711
        }
3712

    
3713
		private boolean checkNodeNameReplaceable(Taxon nodeTaxon, TaxonBase<?> newTaxon) {
3714
			//TODO preliminary check
3715
			if (newTaxon.isInstanceOf(Synonym.class)){
3716
				return false;
3717
			}
3718
			INonViralName nodeName = nodeTaxon.getName();
3719
			INonViralName newName = newTaxon.getName();
3720
			if (nodeTaxon.getName() == null ||  newName == null){
3721
				return false;
3722
			}
3723
			if (nodeTaxon.getDescriptions().size() > 0 || nodeName.getDescriptions().size() > 0 || nodeName.getTypeDesignations().size() > 0 ){
3724
				return false;
3725
			}
3726
			boolean compare = true;
3727
			for (NomenclaturalStatus status : newName.getStatus() ){
3728
				compare &= compareStatus(nodeTaxon, status.getType());
3729
			}
3730
			if (! compare){
3731
				return false;
3732
			}
3733

    
3734
			if (nodeName.getNameCache() != null && nodeName.getNameCache().equals(newName.getNameCache())){
3735
				if (nodeName.getNameCache().equals(nodeName.getTitleCache())){
3736
					if (newName.getNameCache().length() < newName.getTitleCache().length()){
3737
						logger.warn("We still need to check, if node was automatically created via hierarchy creation: " + nodeName.getNameCache());
3738
						return true;
3739
					}
3740
				}
3741
			}
3742

    
3743
			return false;
3744
		}
3745

    
3746
		/**
3747
         *
3748
         */
3749
        private Taxon buildHierarchy() {
3750
            logger.info("buildHierarchy");
3751
            Taxon higherTaxon = null;
3752
            //add the maxRank as a root
3753
            if(hierarchy.containsKey(state2.getConfig().getMaxRank())){
3754
                Taxon ct=hierarchy.get(state2.getConfig().getMaxRank());
3755
                if(!taxonExistsInClassification(higherTaxon, ct)) {
3756
                   classification.addChildTaxon(ct, refMods, null);
3757
                }
3758
                higherTaxon = hierarchy.get(state2.getConfig().getMaxRank());
3759
                //                return higherTaxon;
3760
            }
3761
            //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3762

    
3763
            //TODO higher Ranks
3764

    
3765
            if(hierarchy.containsKey(Rank.FAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.FAMILY())){
3766
                higherTaxon=saveAndGetHigherTaxon(Rank.FAMILY(),higherTaxon);
3767
            }
3768
            if(hierarchy.containsKey(Rank.SUBFAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.SUBFAMILY())){
3769
                higherTaxon=saveAndGetHigherTaxon(Rank.SUBFAMILY(),higherTaxon);
3770
            }
3771
            if(hierarchy.containsKey(Rank.TRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.TRIBE())){
3772
                higherTaxon=saveAndGetHigherTaxon(Rank.TRIBE(),higherTaxon);
3773
            }
3774
            if(hierarchy.containsKey(Rank.SUBTRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.SUBTRIBE())){
3775
                higherTaxon=saveAndGetHigherTaxon(Rank.SUBTRIBE(),higherTaxon);
3776
            }
3777
            if(hierarchy.containsKey(Rank.GENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3778
                higherTaxon=saveAndGetHigherTaxon(Rank.GENUS(),higherTaxon);
3779
            }
3780
            if(hierarchy.containsKey(Rank.SUBGENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3781
                higherTaxon=saveAndGetHigherTaxon(Rank.SUBGENUS(),higherTaxon);
3782
            }
3783
            importer.getClassificationService().saveOrUpdate(classification);
3784
            return higherTaxon;
3785
        }
3786

    
3787
        private Taxon saveAndGetHigherTaxon(Rank r, Taxon higherTaxon){
3788
            Taxon ct=hierarchy.get(r);
3789
            if(!taxonExistsInClassification(higherTaxon,ct )) {
3790
                if(higherTaxon != null && ct!=null) {
3791
                    classification.addParentChild(higherTaxon, ct, refMods, null);
3792
                } else
3793
                    if(higherTaxon == null && ct !=null) {
3794
                        classification.addChildTaxon(ct, refMods, null);
3795
                }
3796
            }
3797
            return ct;
3798
        }
3799

    
3800
        private boolean taxonExistsInClassification(Taxon parent, Taxon child){
3801
            logger.info("taxonExistsInClassification");
3802
            //            System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3803
            boolean found=false;
3804
            if(parent !=null){
3805
                for (TaxonNode p : classification.getAllNodes()){
3806
                    if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
3807
                        for (TaxonNode c : p.getChildNodes()) {
3808
                            if (c.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3809
                                found=true;
3810
                                break;
3811
                            }
3812
                        }
3813
                    }
3814
                }
3815
            }
3816
            else{
3817
                for (TaxonNode p : classification.getAllNodes()){
3818
                    if(p.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3819
                        found=true;
3820
                        break;
3821
                    }
3822
                }
3823
            }
3824
            //            System.out.println("LOOK IF TAXA EXIST? "+found);
3825
            return found;
3826
        }
3827
        /**
3828
         * @param nameToBeFilledTest
3829
         */
3830
        public void setParsedName(ITaxonNameBase nameToBeFilledTest) {
3831
            this.taxonNameBase = TaxonNameBase.castAndDeproxy(nameToBeFilledTest);
3832

    
3833
        }
3834
        //variety dwcranks:varietyEpithet
3835
        /**
3836
         * @return the author
3837
         */
3838
        public String getAuthor() {
3839
            return author;
3840
        }
3841
        /**
3842
         * @return
3843
         */
3844
        public Taxon getTaxon() {
3845
            return taxon;
3846
        }
3847
        /**
3848
         * @return
3849
         */
3850
        public TaxonNameBase<?,?> getTaxonNameBase() {
3851
            return taxonNameBase;
3852
        }
3853

    
3854
        /**
3855
         * @param findOrCreateTaxon
3856
         */
3857
        public void setForm(Taxon form) {
3858
            this.form=form;
3859

    
3860
        }
3861
        /**
3862
         * @param findOrCreateTaxon
3863
         */
3864
        public void setVariety(Taxon variety) {
3865
            this.variety=variety;
3866

    
3867
        }
3868
        /**
3869
         * @param string
3870
         * @return
3871
         */
3872
        @SuppressWarnings("rawtypes")
3873
        public Taxon findOrCreateTaxon(String partialname,String fullname, Rank rank, Rank globalrank) {
3874
            logger.info("findOrCreateTaxon");
3875
            sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
3876
            //takes too much time
3877
            //            List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3878
            //            logger.info("tmpList returned: "+tmpList.size());
3879

    
3880
            NomenclaturalStatusType statusType = null;
3881
            if (!getStatus().isEmpty()){
3882
                try {
3883
                    statusType = nomStatusString2NomStatus(getStatus());
3884
                } catch (UnknownCdmTypeException e) {
3885
                    addProblematicStatusToFile(getStatus());
3886
                    logger.warn("Problem with status");
3887
                }
3888
            }
3889

    
3890
            List<TaxonBase> tmpListFiltered = new ArrayList<TaxonBase>();
3891

    
3892
            Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, fullname, MatchMode.BEGINNING, null, null, null, null, null);
3893

    
3894
            tmpListFiltered.addAll(taxontest.getRecords());
3895
            taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, partialname, MatchMode.BEGINNING, null, null, null, null, null);
3896
            tmpListFiltered.addAll(taxontest.getRecords());
3897

    
3898
            //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3899

    
3900
            boolean nameCorrected=false;
3901
            if (fullname.indexOf(partialname)<0) {
3902
                nameCorrected=true;
3903
            }
3904

    
3905
            boolean foundIdentic=false;
3906
            Taxon tmp=null;
3907
            for (TaxonBase tmpb:tmpListFiltered){
3908
                if(tmpb !=null){
3909
                    TaxonNameBase tnb =  tmpb.getName();
3910
                    Rank crank=null;
3911
                    if (tnb != null){
3912
                         if(globalrank.equals(rank) || (globalrank.isLower(Rank.SPECIES()) && rank.equals(Rank.SPECIES()))){
3913
                            if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname) ){
3914
                                crank =tnb.getRank();
3915
                                if (crank !=null && rank !=null){
3916
                                    if (crank.equals(rank)){
3917
                                        foundIdentic=true;
3918
                                        try{
3919
                                            tmp=(Taxon)tmpb;
3920
                                            break;
3921
                                        }catch(Exception e){
3922
                                            e.printStackTrace();
3923
                                        }
3924
                                    }
3925
                                }
3926
                            }
3927
                            if(nameCorrected){ //for corrected names such as Anochetus -- A. blf-pat
3928
                                if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3929
                                    crank =tnb.getRank();
3930
                                    if (crank !=null && rank !=null){
3931
                                        if (crank.equals(rank)){
3932
                                            foundIdentic=true;
3933
                                            try{
3934
                                                tmp=(Taxon)tmpb;
3935
                                                break;
3936
                                            }catch(Exception e){
3937
                                                e.printStackTrace();
3938
                                            }
3939
                                        }
3940
                                    }
3941
                                }
3942
                            }
3943
                        }
3944
                        else{
3945
                            if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3946
                                crank =tnb.getRank();
3947
                                if (crank !=null && rank !=null){
3948
                                    if (crank.equals(rank)){
3949
                                        foundIdentic=true;
3950
                                        try{
3951
                                            tmp=(Taxon)tmpb;
3952
                                            break;
3953
                                        }catch(Exception e){
3954
                                            e.printStackTrace();
3955
                                        }
3956
                                    }
3957
                                }
3958
                            }
3959
                        }
3960
                    }
3961
                }
3962
            }
3963
            boolean statusMatch=false;
3964
            boolean appendedMatch=false;
3965
            if(tmp !=null && foundIdentic){
3966
                statusMatch=compareStatus(tmp, statusType);
3967
                if (!getStatus().isEmpty() && ! (tmp.getAppendedPhrase() == null)) {
3968
                    appendedMatch=tmp.getAppendedPhrase().equals(getStatus());
3969
                }
3970
                if (getStatus().isEmpty() && tmp.getAppendedPhrase() == null) {
3971
                    appendedMatch=true;
3972
                }
3973

    
3974
            }
3975
            if ((tmp == null || !foundIdentic) ||  (tmp != null && !statusMatch) ||  (tmp != null && !appendedMatch && !statusMatch)){
3976

    
3977
                INonViralName tnb = getNonViralNameAccNomenclature();
3978
                tnb.setRank(rank);
3979

    
3980
                if(statusType != null) {
3981
                    tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3982
                }
3983
                if(StringUtils.isNotBlank(getStatus())) {
3984
                    tnb.setAppendedPhrase(getStatus());
3985
                }
3986

    
3987
                if(rank.equals(Rank.UNKNOWN_RANK())){
3988
                    tnb.setTitleCache(fullname, true);
3989
                    //                    tnb.setGenusOrUninomial(fullname);
3990
                }
3991
                if(rank.isHigher(Rank.GENUS())) {
3992
                    tnb.setGenusOrUninomial(partialname);
3993
                }
3994

    
3995
                if(rank.isHigher(Rank.SPECIES())) {
3996
                    tnb.setTitleCache(partialname, true);
3997
                }
3998

    
3999
                if (rank.equals(globalrank) && author != null) {
4000

    
4001
                    tnb.setCombinationAuthorship(findOrCreateAuthor(author));
4002
                    if (getIdentifier() !=null && !getIdentifier().isEmpty()){
4003
                        Taxon taxonLSID = getTaxonByLSID(getIdentifier());
4004
                        if (taxonLSID !=null) {
4005
                            tmp=taxonLSID;
4006
                        }
4007
                    }
4008
                }
4009

    
4010
                if(tmp == null){
4011
                    if (rank.equals(Rank.FAMILY())) {
4012
                        tmp = buildFamily(tnb);
4013
                    }
4014
                    if (rank.equals(Rank.SUBFAMILY())) {
4015
                        tmp = buildSubfamily(tnb);
4016
                    }
4017
                    if (rank.equals(Rank.TRIBE())) {
4018
                        tmp = buildTribe(tnb);
4019
                    }
4020
                    if (rank.equals(Rank.SUBTRIBE())) {
4021
                        tmp = buildSubtribe(tnb);
4022
                    }
4023
                    if (rank.equals(Rank.GENUS())) {
4024
                        tmp = buildGenus(partialname, tnb);
4025
                    }
4026

    
4027
                    if (rank.equals(Rank.SUBGENUS())) {
4028
                        tmp = buildSubgenus(partialname, tnb);
4029
                    }
4030
                    if (rank.equals(Rank.SPECIES())) {
4031
                        tmp = buildSpecies(partialname, tnb);
4032
                    }
4033

    
4034
                    if (rank.equals(Rank.SUBSPECIES())) {
4035
                        tmp = buildSubspecies(partialname, tnb);
4036
                    }
4037

    
4038
                    if (rank.equals(Rank.VARIETY())) {
4039
                        tmp = buildVariety(fullname, partialname, tnb);
4040
                    }
4041

    
4042
                    if (rank.equals(Rank.FORM())) {
4043
                        tmp = buildForm(fullname, partialname, tnb);
4044
                    }
4045
                    if (tmp != null){
4046
                    	TaxonXTreatmentExtractor.this.sourceHandler.addSource(refMods, tmp);
4047
                    }
4048

    
4049
                    importer.getClassificationService().saveOrUpdate(classification);
4050
                }
4051

    
4052
            }
4053

    
4054
            tmp = CdmBase.deproxy(tmp, Taxon.class);
4055
            if (rank.equals(globalrank) && author != null) {
4056
                if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4057
                    setLSID(getIdentifier(), tmp);
4058
                    importer.getTaxonService().saveOrUpdate(tmp);
4059
                    tmp = CdmBase.deproxy(tmp, Taxon.class);
4060
                }
4061
            }
4062

    
4063
            this.taxon=tmp;
4064

    
4065
            return tmp;
4066
        }
4067

    
4068
        /**
4069
         * @param tnb
4070
         * @return
4071
         */
4072
        private Taxon buildSubfamily(INonViralName tnb) {
4073
            Taxon tmp;
4074
            //            tnb.generateTitle();
4075
            tmp = findMatchingTaxon(tnb,refMods);
4076
            if(tmp ==null){
4077
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4078
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4079
                    tmp.setSec(state2.getConfig().getSecundum());
4080
                }
4081
                //                tmp.setSec(refMods);
4082
                //                sourceHandler.addSource(refMods, tmp);
4083
                if(family != null) {
4084
                    classification.addParentChild(family, tmp, null, null);
4085
                    higherRank=Rank.FAMILY();
4086
                    higherTaxa=family;
4087
                } else {
4088
                    //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4089
                    classification.addChildTaxon(tmp, null, null);
4090
                }
4091
            }
4092
            return tmp;
4093
        }
4094
        /**
4095
         * @param tnb
4096
         * @return
4097
         */
4098
        private Taxon buildFamily(INonViralName tnb) {
4099
            Taxon tmp;
4100
            //            tnb.generateTitle();
4101
            tmp = findMatchingTaxon(tnb,refMods);
4102
            if(tmp ==null){
4103
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4104
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4105
                    tmp.setSec(state2.getConfig().getSecundum());
4106
                }
4107
                //                tmp.setSec(refMods);
4108
                //sourceHandler.addSource(refMods, tmp);
4109
                //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4110
                classification.addChildTaxon(tmp, null, null);
4111
            }
4112
            return tmp;
4113
        }
4114
        /**
4115
         * @param fullname
4116
         * @param tnb
4117
         * @return
4118
         */
4119
        private Taxon buildForm(String fullname, String partialname, INonViralName tnb) {
4120
            if (genusName !=null) {
4121
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4122
            }
4123
            if (subgenusName !=null) {
4124
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4125
            }
4126
            if(speciesName !=null) {
4127
                tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4128
            }
4129
            if(subspeciesName != null) {
4130
                tnb.setInfraSpecificEpithet(subspeciesName.getInfraSpecificEpithet());
4131
            }
4132
            if(partialname!= null) {
4133
                tnb.setInfraSpecificEpithet(partialname);
4134
            }
4135
             //TODO how to save form??
4136
            tnb.setTitleCache(fullname, true);
4137
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4138
            if(tmp ==null){
4139
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4140
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4141
                    tmp.setSec(state2.getConfig().getSecundum());
4142
                }
4143
                //                tmp.setSec(refMods);
4144
                //sourceHandler.addSource(refMods, tmp);
4145
                if (subspecies !=null) {
4146
                    classification.addParentChild(subspecies, tmp, null, null);
4147
                    higherRank=Rank.SUBSPECIES();
4148
                    higherTaxa=subspecies;
4149
                } else {
4150
                    if (species !=null) {
4151
                        classification.addParentChild(species, tmp, null, null);
4152
                        higherRank=Rank.SPECIES();
4153
                        higherTaxa=species;
4154
                    }
4155
                    else{
4156
                        //                        System.out.println("ADDCHILDTAXON FORM "+tmp);
4157
                        classification.addChildTaxon(tmp, null, null);
4158
                    }
4159
                }
4160
            }
4161
            return tmp;
4162
        }
4163
        /**
4164
         * @param fullname
4165
         * @param tnb
4166
         * @return
4167
         */
4168
        private Taxon buildVariety(String fullname, String partialname, INonViralName tnb) {
4169
            Taxon tmp;
4170
            if (genusName !=null) {
4171
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4172
            }
4173
            if (subgenusName !=null) {
4174
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4175
            }
4176
            if(speciesName !=null) {
4177
                tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4178
            }
4179
            if(subspeciesName != null) {
4180
                tnb.setInfraSpecificEpithet(subspeciesName.getSpecificEpithet());
4181
            }
4182
            if(partialname != null) {
4183
                tnb.setInfraSpecificEpithet(partialname);
4184
            }
4185
            //TODO how to save variety?
4186
            tnb.setTitleCache(fullname, true);
4187
            tmp = findMatchingTaxon(tnb,refMods);
4188
            if(tmp ==null){
4189
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4190
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4191
                    tmp.setSec(state2.getConfig().getSecundum());
4192
                }
4193
                //                tmp.setSec(refMods);
4194
                //sourceHandler.addSource(refMods, tmp);
4195
                if (subspecies !=null) {
4196
                    classification.addParentChild(subspecies, tmp, null, null);
4197
                    higherRank=Rank.SUBSPECIES();
4198
                    higherTaxa=subspecies;
4199
                } else {
4200
                    if(species !=null) {
4201
                        classification.addParentChild(species, tmp, null, null);
4202
                        higherRank=Rank.SPECIES();
4203
                        higherTaxa=species;
4204
                    }
4205
                    else{
4206
                        //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4207
                        classification.addChildTaxon(tmp, null, null);
4208
                    }
4209
                }
4210
            }
4211
            return tmp;
4212
        }
4213
        /**
4214
         * @param partialname
4215
         * @param tnb
4216
         * @return
4217
         */
4218
        private Taxon buildSubspecies(String partialname, INonViralName tnb) {
4219
            if (genusName !=null) {
4220
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4221
            }
4222
            if (subgenusName !=null) {
4223
                //                            System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4224
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4225
            }
4226
            if(speciesName !=null) {
4227
                //                            System.out.println("SPE:"+speciesName.getSpecificEpithet());
4228
                tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4229
            }
4230
            tnb.setInfraSpecificEpithet(partialname);
4231
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4232
            if(tmp ==null){
4233
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4234
                if(!state2.getConfig().doKeepOriginalSecundum())
4235
                 {
4236
                    tmp.setSec(state2.getConfig().getSecundum());
4237
                //                tmp.setSec(refMods);
4238
                //sourceHandler.addSource(refMods, tmp);
4239
                }
4240

    
4241
                if(species != null) {
4242
                    classification.addParentChild(species, tmp, null, null);
4243
                    higherRank=Rank.SPECIES();
4244
                    higherTaxa=species;
4245
                }
4246
                else{
4247
                    //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4248
                    classification.addChildTaxon(tmp, null, null);
4249
                }
4250
            }
4251
            return tmp;
4252
        }
4253
        /**
4254
         * @param partialname
4255
         * @param tnb
4256
         * @return
4257
         */
4258
        private Taxon buildSpecies(String partialname, INonViralName tnb) {
4259
            if (genusName !=null) {
4260
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4261
            }
4262
            if (subgenusName !=null) {
4263
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4264
            }
4265
            tnb.setSpecificEpithet(partialname.toLowerCase());
4266
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4267
            if(tmp ==null){
4268
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4269
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4270
                    tmp.setSec(state2.getConfig().getSecundum());
4271
                }
4272
                //                tmp.setSec(refMods);
4273
                //sourceHandler.addSource(refMods, tmp);
4274
                if (subgenus !=null) {
4275
                    classification.addParentChild(subgenus, tmp, null, null);
4276
                    higherRank=Rank.SUBGENUS();
4277
                    higherTaxa=subgenus;
4278
                } else {
4279
                    if (genus !=null) {
4280
                        classification.addParentChild(genus, tmp, null, null);
4281
                        higherRank=Rank.GENUS();
4282
                        higherTaxa=genus;
4283
                    }
4284
                    else{
4285
                        //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4286
                        classification.addChildTaxon(tmp, null, null);
4287
                    }
4288
                }
4289
            }
4290
            return tmp;
4291
        }
4292
        /**
4293
         * @param partialname
4294
         * @param tnb
4295
         * @return
4296
         */
4297
        private Taxon buildSubgenus(String partialname, INonViralName tnb) {
4298
            tnb.setInfraGenericEpithet(partialname);
4299
            if (genusName !=null) {
4300
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4301
            }
4302
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4303
            if(tmp ==null){
4304
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4305
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4306
                    tmp.setSec(state2.getConfig().getSecundum());
4307
                }
4308
                //                tmp.setSec(refMods);
4309
                //sourceHandler.addSource(refMods, tmp);
4310
                if(genus != null) {
4311
                    classification.addParentChild(genus, tmp, null, null);
4312
                    higherRank=Rank.GENUS();
4313
                    higherTaxa=genus;
4314
                } else{
4315
                    //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4316
                    classification.addChildTaxon(tmp, null, null);
4317
                }
4318
            }
4319
            return tmp;
4320
        }
4321
        /**
4322
         * @param partialname
4323
         * @param tnb
4324
         * @return
4325
         */
4326
        private Taxon buildGenus(String partialname, INonViralName tnb) {
4327
            Taxon tmp;
4328
            tnb.setGenusOrUninomial(partialname);
4329

    
4330

    
4331
            tmp = findMatchingTaxon(tnb,refMods);
4332
            if(tmp ==null){
4333
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4334
                if(!state2.getConfig().doKeepOriginalSecundum())
4335
                 {
4336
                    tmp.setSec(state2.getConfig().getSecundum());
4337
                //                tmp.setSec(refMods);
4338
                //sourceHandler.addSource(refMods, tmp);
4339
                }
4340

    
4341
                if(subtribe != null) {
4342
                    classification.addParentChild(subtribe, tmp, null, null);
4343
                    higherRank=Rank.SUBTRIBE();
4344
                    higherTaxa=subtribe;
4345
                } else{
4346
                    if(tribe !=null) {
4347
                        classification.addParentChild(tribe, tmp, null, null);
4348
                        higherRank=Rank.TRIBE();
4349
                        higherTaxa=tribe;
4350
                    } else{
4351
                        if(subfamily !=null) {
4352
                            classification.addParentChild(subfamily, tmp, null, null);
4353
                            higherRank=Rank.SUBFAMILY();
4354
                            higherTaxa=subfamily;
4355
                        } else
4356
                            if(family !=null) {
4357
                                classification.addParentChild(family, tmp, null, null);
4358
                                higherRank=Rank.FAMILY();
4359
                                higherTaxa=family;
4360
                            }
4361
                            else{
4362
                                //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4363
                                classification.addChildTaxon(tmp, null, null);
4364
                            }
4365
                    }
4366
                }
4367
            }
4368
            return tmp;
4369
        }
4370

    
4371
        /**
4372
         * @param tnb
4373
         * @return
4374
         */
4375
        private Taxon buildSubtribe(INonViralName tnb) {
4376
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4377
            if(tmp==null){
4378
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4379
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4380
                    tmp.setSec(state2.getConfig().getSecundum());
4381
                }
4382
                //                tmp.setSec(refMods);
4383
                //sourceHandler.addSource(refMods, tmp);
4384
                if(tribe != null) {
4385
                    classification.addParentChild(tribe, tmp, null, null);
4386
                    higherRank=Rank.TRIBE();
4387
                    higherTaxa=tribe;
4388
                } else{
4389
                    //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4390
                    classification.addChildTaxon(tmp, null, null);
4391
                }
4392
            }
4393
            return tmp;
4394
        }
4395
        /**
4396
         * @param tnb
4397
         * @return
4398
         */
4399
        private Taxon buildTribe(INonViralName tnb) {
4400
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4401
            if(tmp==null){
4402
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4403
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4404
                    tmp.setSec(state2.getConfig().getSecundum());
4405
                }
4406
                //                tmp.setSec(refMods);
4407
                //sourceHandler.addSource(refMods, tmp);
4408
                if (subfamily !=null) {
4409
                    classification.addParentChild(subfamily, tmp, null, null);
4410
                    higherRank=Rank.SUBFAMILY();
4411
                    higherTaxa=subfamily;
4412
                } else {
4413
                    if(family != null) {
4414
                        classification.addParentChild(family, tmp, null, null);
4415
                        higherRank=Rank.FAMILY();
4416
                        higherTaxa=family;
4417
                    }
4418
                    else{
4419
                        //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4420
                        classification.addChildTaxon(tmp, null, null);
4421
                    }
4422
                }
4423
            }
4424
            return tmp;
4425
        }
4426

    
4427
        /**
4428
         * @param identifier2
4429
         * @return
4430
         */
4431
        @SuppressWarnings("rawtypes")
4432
        private Taxon getTaxonByLSID(String identifier) {
4433
            //logger.info("getTaxonByLSID");
4434
            //            boolean lsidok=false;
4435
            String id = identifier.split("__")[0];
4436
            //            String source = identifier.split("__")[1];
4437
            LSID lsid = null;
4438
            if (id.indexOf("lsid")>-1){
4439
                try {
4440
                    lsid = new LSID(id);
4441
                    //                    lsidok=true;
4442
                } catch (MalformedLSIDException e) {
4443
                    logger.warn("Malformed LSID");
4444
                }
4445
            }
4446
            if (lsid !=null){
4447
                List<Taxon> taxa = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
4448
                LSID currentlsid=null;
4449
                for (Taxon t:taxa){
4450
                    currentlsid = t.getLsid();
4451
                    if (currentlsid !=null){
4452
                        if (currentlsid.getLsid().equals(lsid.getLsid())){
4453
                            try{
4454
                                return t;
4455
                            }
4456
                            catch(Exception e){logger.warn("Exception occurred while comparing LSIDs "+e );}
4457
                        }
4458
                    }
4459
                }
4460
            }
4461
            return null;
4462
        }
4463
        /**
4464
         * @param author2
4465
         * @return
4466
         */
4467
        @SuppressWarnings("rawtypes")
4468
        private Person findOrCreateAuthor(String author2) {
4469
            //logger.info("findOrCreateAuthor");
4470
            List<UuidAndTitleCache<Person>> hiberPersons = importer.getAgentService().getPersonUuidAndTitleCache();
4471
            for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
4472
                if(hibernateP.getTitleCache().equals(author2)) {
4473
                    AgentBase existing = importer.getAgentService().find(hibernateP.getUuid());
4474
                    return CdmBase.deproxy(existing, Person.class);
4475
                }
4476
            }
4477
            Person p = Person.NewInstance();
4478
            p.setTitleCache(author2,true);
4479
            importer.getAgentService().saveOrUpdate(p);
4480
            return CdmBase.deproxy(p, Person.class);
4481
        }
4482
        /**
4483
         * @param author the author to set
4484
         */
4485
        public void setAuthor(String author) {
4486
            this.author = author;
4487
        }
4488

    
4489
        /**
4490
         * @return the higherTaxa
4491
         */
4492
        public Taxon getHigherTaxa() {
4493
            return higherTaxa;
4494
        }
4495
        /**
4496
         * @param higherTaxa the higherTaxa to set
4497
         */
4498
        public void setHigherTaxa(Taxon higherTaxa) {
4499
            this.higherTaxa = higherTaxa;
4500
        }
4501
        /**
4502
         * @return the higherRank
4503
         */
4504
        public Rank getHigherRank() {
4505
            return higherRank;
4506
        }
4507
        /**
4508
         * @param higherRank the higherRank to set
4509
         */
4510
        public void setHigherRank(Rank higherRank) {
4511
            this.higherRank = higherRank;
4512
        }
4513
        public String getName(){
4514
            if (newName.isEmpty()) {
4515
                return originalName;
4516
            } else {
4517
                return newName;
4518
            }
4519

    
4520
        }
4521
        /**
4522
         * @return the fullName
4523
         */
4524
        public String getOriginalName() {
4525
            return originalName;
4526
        }
4527
        /**
4528
         * @param fullName the fullName to set
4529
         */
4530
        public void setOriginalName(String fullName) {
4531
            this.originalName = fullName;
4532
        }
4533
        /**
4534
         * @return the newName
4535
         */
4536
        public String getNewName() {
4537
            return newName;
4538
        }
4539
        /**
4540
         * @param newName the newName to set
4541
         */
4542
        public void setNewName(String newName) {
4543
            this.newName = newName;
4544
        }
4545
        /**
4546
         * @return the rank
4547
         */
4548
        public Rank getRank() {
4549
            return rank;
4550
        }
4551
        /**
4552
         * @param rank the rank to set
4553
         */
4554
        public void setRank(Rank rank) {
4555
            this.rank = rank;
4556
        }
4557
        /**
4558
         * @return the idenfitiger
4559
         */
4560
        public String getIdentifier() {
4561
            return identifier;
4562
        }
4563
        /**
4564
         * @param idenfitiger the idenfitiger to set
4565
         */
4566
        public void setIdentifier(String identifier) {
4567
            this.identifier = identifier;
4568
        }
4569
        /**
4570
         * @return the status
4571
         */
4572
        public String getStatus() {
4573
            if (status == null) {
4574
                return "";
4575
            }
4576
            return status;
4577
        }
4578
        /**
4579
         * @param status the status to set
4580
         */
4581
        public void setStatus(String status) {
4582
            this.status = status;
4583
        }
4584
        /**
4585
         * @return the family
4586
         */
4587
        public Taxon getFamily() {
4588
            return family;
4589
        }
4590
        /**
4591
         * @param family the family to set
4592
         */
4593
        @SuppressWarnings("rawtypes")
4594
        public void setFamily(Taxon family) {
4595
            this.family = family;
4596
            familyName = CdmBase.deproxy(family.getName());
4597
        }
4598
        /**
4599
         * @return the subfamily
4600
         */
4601
        public Taxon getSubfamily() {
4602
            return subfamily;
4603
        }
4604
        /**
4605
         * @param subfamily the subfamily to set
4606
         */
4607
        @SuppressWarnings("rawtypes")
4608
        public void setSubfamily(Taxon subfamily) {
4609
            this.subfamily = subfamily;
4610
            subfamilyName = CdmBase.deproxy(subfamily.getName());
4611
        }
4612
        /**
4613
         * @return the tribe
4614
         */
4615
        public Taxon getTribe() {
4616
            return tribe;
4617
        }
4618
        /**
4619
         * @param tribe the tribe to set
4620
         */
4621
        @SuppressWarnings("rawtypes")
4622
        public void setTribe(Taxon tribe) {
4623
            this.tribe = tribe;
4624
            tribeName = CdmBase.deproxy(tribe.getName());
4625
        }
4626
        /**
4627
         * @return the subtribe
4628
         */
4629
        public Taxon getSubtribe() {
4630
            return subtribe;
4631
        }
4632
        /**
4633
         * @param subtribe the subtribe to set
4634
         */
4635
        @SuppressWarnings("rawtypes")
4636
        public void setSubtribe(Taxon subtribe) {
4637
            this.subtribe = subtribe;
4638
            subtribeName =CdmBase.deproxy(subtribe.getName());
4639
        }
4640
        /**
4641
         * @return the genus
4642
         */
4643
        public Taxon getGenus() {
4644
            return genus;
4645
        }
4646
        /**
4647
         * @param genus the genus to set
4648
         */
4649
        @SuppressWarnings("rawtypes")
4650
        public void setGenus(Taxon genus) {
4651
            if (genus != null){
4652
	        	this.genus = genus;
4653
	            genusName = CdmBase.deproxy(genus.getName());
4654
            }
4655
        }
4656
        /**
4657
         * @return the subgenus
4658
         */
4659
        public Taxon getSubgenus() {
4660
            return subgenus;
4661
        }
4662
        /**
4663
         * @param subgenus the subgenus to set
4664
         */
4665
        @SuppressWarnings("rawtypes")
4666
        public void setSubgenus(Taxon subgenus) {
4667
            this.subgenus = subgenus;
4668
            subgenusName = CdmBase.deproxy(subgenus.getName());
4669
        }
4670
        /**
4671
         * @return the species
4672
         */
4673
        public Taxon getSpecies() {
4674
            return species;
4675
        }
4676
        /**
4677
         * @param species the species to set
4678
         */
4679
        public void setSpecies(Taxon species) {
4680
        	if (species != null){
4681
	            this.species = species;
4682
	            speciesName = CdmBase.deproxy(species.getName());
4683
        	}
4684
        }
4685
        /**
4686
         * @return the subspecies
4687
         */
4688
        public Taxon getSubspecies() {
4689
            return subspecies;
4690
        }
4691
        /**
4692
         * @param subspecies the subspecies to set
4693
         */
4694
        @SuppressWarnings("rawtypes")
4695
        public void setSubspecies(Taxon subspecies) {
4696
            this.subspecies = subspecies;
4697
            subspeciesName = CdmBase.deproxy(subspecies.getName());
4698

    
4699
        }
4700

    
4701

    
4702

    
4703
    }
4704

    
4705

    
4706
    /**
4707
     * @param status
4708
     */
4709
    private void addProblematicStatusToFile(String status) {
4710
        try{
4711
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "StatusUnknown_"+classification.getTitleCache()+".txt",true);
4712
            BufferedWriter out = new BufferedWriter(fstream);
4713
            out.write(status+"\n");
4714
            //Close the output stream
4715
            out.close();
4716
        }catch (Exception e){//Catch exception if any
4717
            System.err.println("Error: " + e.getMessage());
4718
        }
4719

    
4720
    }
4721

    
4722

    
4723

    
4724
    /**
4725
     * @param tnb
4726
     * @return
4727
     */
4728
    private Taxon findMatchingTaxon(INonViralName tnb, Reference refMods) {
4729
        logger.info("findMatchingTaxon");
4730
        Taxon tmp=null;
4731

    
4732
        refMods=CdmBase.deproxy(refMods, Reference.class);
4733
        boolean insertAsExisting =false;
4734
        List<Taxon> existingTaxa = new ArrayList<Taxon>();
4735
        try {
4736
            existingTaxa = getMatchingTaxa(tnb);
4737
        } catch (Exception e1) {
4738
            // TODO Auto-generated catch block
4739
            e1.printStackTrace();
4740
        }
4741
        double similarityScore=0.0;
4742
        double similarityAuthor=-1;
4743
        String author1="";
4744
        String author2="";
4745
        String t1="";
4746
        String t2="";
4747
        for (Taxon bestMatchingTaxon : existingTaxa){
4748
            if (!existingTaxa.isEmpty() && state2.getConfig().isInteractWithUser() && !insertAsExisting) {
4749
                //                System.out.println("tnb "+tnb.getTitleCache());
4750
                //                System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4751
                try {
4752
                    if(tnb.getAuthorshipCache()!=null) {
4753
                        author1=tnb.getAuthorshipCache();
4754
                    }
4755
                } catch (Exception e) {
4756
                    // TODO Auto-generated catch block
4757
                    e.printStackTrace();
4758
                }
4759
                try {
4760
                    if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
4761
                        author2=bestMatchingTaxon.getName().getAuthorshipCache();
4762
                    }
4763
                } catch (Exception e) {
4764
                    // TODO Auto-generated catch block
4765
                    e.printStackTrace();
4766
                }
4767
                try {
4768
                    t1=tnb.getTitleCache().split("sec.")[0].trim();
4769
                    if (author1!=null && !StringUtils.isEmpty(author1)) {
4770
                        t1=t1.split(Pattern.quote(author1))[0];
4771
                    }
4772
                } catch (Exception e) {
4773
                    // TODO Auto-generated catch block
4774
                    e.printStackTrace();
4775
                }
4776
                try {
4777
                    t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
4778
                    if (author2!=null && !StringUtils.isEmpty(author2)) {
4779
                        t2=t2.split(Pattern.quote(author2))[0];
4780
                    }
4781
                } catch (Exception e) {
4782
                    // TODO Auto-generated catch block
4783
                    e.printStackTrace();
4784
                }
4785
                similarityScore=similarity(t1.trim(), t2.trim());
4786
                //                System.out.println("taxascore: "+similarityScore);
4787
                similarityAuthor=similarity(author1.trim(), author2.trim());
4788
                //                System.out.println("authorscore: "+similarityAuthor);
4789
                insertAsExisting = compareAndCheckTaxon(tnb, refMods, similarityScore, bestMatchingTaxon,similarityAuthor);
4790
            }
4791
            if(insertAsExisting) {
4792
                //System.out.println("KEEP "+bestMatchingTaxon.toString());
4793
                tmp=bestMatchingTaxon;
4794
                sourceHandler.addSource(refMods, tmp);
4795
                return tmp;
4796
            }
4797
        }
4798
        return tmp;
4799
    }
4800

    
4801

    
4802
    /**
4803
     * @param tnb
4804
     * @param refMods
4805
     * @param similarityScore
4806
     * @param bestMatchingTaxon
4807
     * @param similarityAuthor
4808
     * @return
4809
     */
4810
    private boolean compareAndCheckTaxon(INonViralName tnb, Reference refMods, double similarityScore,
4811
            Taxon bestMatchingTaxon, double similarityAuthor) {
4812
        //logger.info("compareAndCheckTaxon");
4813
        boolean insertAsExisting;
4814
        //        if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4815
        //            insertAsExisting=false;
4816
        //        } else{
4817
        //a small hack/automatisation for Chenopodium only
4818
        if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4819
                bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4820
            insertAsExisting=true;
4821
        } else {
4822
            insertAsExisting=askIfReuseBestMatchingTaxon(tnb, bestMatchingTaxon, refMods, similarityScore,similarityAuthor);
4823
        }
4824
        //        }
4825

    
4826
        logDecision(tnb, bestMatchingTaxon, insertAsExisting, refMods);
4827
        return insertAsExisting;
4828
    }
4829

    
4830
    /**
4831
     * @return
4832
     */
4833
    @SuppressWarnings("rawtypes")
4834
    private List<Taxon> getMatchingTaxa(ITaxonNameBase tnb) {
4835
        //logger.info("getMatchingTaxon");
4836
    	if (tnb.getTitleCache() == null){
4837
    		tnb.setTitleCache(tnb.toString(), tnb.isProtectedTitleCache());
4838
    	}
4839

    
4840
        Pager<TaxonBase> pager=importer.getTaxonService().findByTitle(TaxonBase.class, tnb.getTitleCache().split("sec.")[0].trim(), MatchMode.BEGINNING, null, null, null, null, null);
4841
        List<TaxonBase>records = pager.getRecords();
4842

    
4843
        List<Taxon> existingTaxons = new ArrayList<Taxon>();
4844
        for (TaxonBase r:records){
4845
            try{
4846
                Taxon bestMatchingTaxon = (Taxon)r;
4847
                //                System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4848
                if(compareTaxonNameLength(bestMatchingTaxon.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4849
                    existingTaxons.add(bestMatchingTaxon);
4850
                }
4851
            }catch(ClassCastException e){logger.warn("classcast exception, might be a synonym, ignore it");}
4852
        }
4853
        Taxon bmt = importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
4854
        if (!existingTaxons.contains(bmt) && bmt!=null) {
4855
            if(compareTaxonNameLength(bmt.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4856
                existingTaxons.add(bmt);
4857
            }
4858
        }
4859
        return existingTaxons;
4860
    }
4861

    
4862
    /**
4863
     * Check if the found Taxon can reasonnably be the same
4864
     * example: with and without author should match, but the subspecies should not be suggested for a genus
4865
     * */
4866
    private boolean compareTaxonNameLength(String f, String o){
4867
        //logger.info("compareTaxonNameLength");
4868
        boolean lengthOk=false;
4869
        int sizeF = f.length();
4870
        int sizeO = o.length();
4871
        if (sizeO>=sizeF) {
4872
            lengthOk=true;
4873
        }
4874
        if(sizeF>sizeO) {
4875
            if (sizeF-sizeO>10) {
4876
                lengthOk=false;
4877
            } else {
4878
                lengthOk=true;
4879
            }
4880
        }
4881

    
4882
        //        System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4883
        return lengthOk;
4884
    }
4885

    
4886
    private double similarity(String s1, String s2) {
4887
        //logger.info("similarity");
4888
        //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4889
        if(!StringUtils.isEmpty(s1) && !StringUtils.isEmpty(s2)){
4890
            String l1=s1.toLowerCase().trim();
4891
            String l2=s2.toLowerCase().trim();
4892
            if (l1.length() < l2.length()) { // s1 should always be bigger
4893
                String swap = l1; l1 = l2; l2 = swap;
4894
            }
4895
            int bigLen = l1.length();
4896
            if (bigLen == 0) { return 1.0; /* both strings are zero length */ }
4897
            return (bigLen - computeEditDistance(l1, l2)) / (double) bigLen;
4898
        }
4899
        else{
4900
            if(s1!=null && s2!=null){
4901
                if (s1.equalsIgnoreCase(s2)) {
4902
                    return 1;
4903
                }
4904
            }
4905
            return -1;
4906
        }
4907
    }
4908

    
4909
    private int computeEditDistance(String s1, String s2) {
4910
        //logger.info("computeEditDistance");
4911
        int[] costs = new int[s2.length() + 1];
4912
        for (int i = 0; i <= s1.length(); i++) {
4913
            int lastValue = i;
4914
            for (int j = 0; j <= s2.length(); j++) {
4915
                if (i == 0) {
4916
                    costs[j] = j;
4917
                } else {
4918
                    if (j > 0) {
4919
                        int newValue = costs[j - 1];
4920
                        if (s1.charAt(i - 1) != s2.charAt(j - 1)) {
4921
                            newValue = Math.min(Math.min(newValue, lastValue),
4922
                                    costs[j]) + 1;
4923
                        }
4924
                        costs[j - 1] = lastValue;
4925
                        lastValue = newValue;
4926
                    }
4927
                }
4928
            }
4929
            if (i > 0) {
4930
                costs[s2.length()] = lastValue;
4931
            }
4932
        }
4933
        return costs[s2.length()];
4934
    }
4935

    
4936
    Map<Rank, Taxon> hierarchy = new HashMap<Rank, Taxon>();
4937
    /**
4938
     * @param taxonNameBase
4939
     */
4940
    @SuppressWarnings("rawtypes")
4941
    public void lookForParentNode(INonViralName taxonNameBase, Taxon tax, Reference ref, MyName myName) {
4942
        logger.info("lookForParentNode "+taxonNameBase.getTitleCache()+" for "+myName.toString());
4943
        //System.out.println("LOOK FOR PARENT NODE "+taxonnamebase.toString()+"; "+tax.toString()+"; "+taxonnamebase.getRank());
4944
        INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
4945
        if (taxonNameBase.getRank().equals(Rank.FORM())){
4946
            handleFormHierarchy(ref, myName, parser);
4947
        }
4948
        else if (taxonNameBase.getRank().equals(Rank.VARIETY())){
4949
            handleVarietyHierarchy(ref, myName, parser);
4950
        }
4951
        else if (taxonNameBase.getRank().equals(Rank.SUBSPECIES())){
4952
            handleSubSpeciesHierarchy(ref, myName, parser);
4953
        }
4954
        else if (taxonNameBase.getRank().equals(Rank.SPECIES())){
4955
            handleSpeciesHierarchy(ref, myName, parser);
4956
        }
4957
        else if (taxonNameBase.getRank().equals(Rank.SUBGENUS())){
4958
            handleSubgenusHierarchy(ref, myName, parser);
4959
        }
4960

    
4961
        if (taxonNameBase.getRank().equals(Rank.GENUS())){
4962
            handleGenusHierarchy(ref, myName, parser);
4963
        }
4964
        if (taxonNameBase.getRank().equals(Rank.SUBTRIBE())){
4965
            handleSubtribeHierarchy(ref, myName, parser);
4966
        }
4967
        if (taxonNameBase.getRank().equals(Rank.TRIBE())){
4968
            handleTribeHierarchy(ref, myName, parser);
4969
        }
4970

    
4971
        if (taxonNameBase.getRank().equals(Rank.SUBFAMILY())){
4972
            handleSubfamilyHierarchy(ref, myName, parser);
4973
        }
4974
    }
4975

    
4976
    /**
4977
     * @param ref
4978
     * @param myName
4979
     * @param parser
4980
     */
4981
    private void handleSubfamilyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
4982
        System.out.println("handleSubfamilyHierarchy");
4983
        String parentStr = myName.getFamilyStr();
4984
        Rank r = Rank.FAMILY();
4985
        if(parentStr!=null){
4986

    
4987
            Taxon parent = null;
4988
            Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, parentStr, MatchMode.BEGINNING, null, null, null, null, null);
4989
            for(TaxonBase tb:taxontest.getRecords()){
4990
                try {
4991
                    if (tb.getName().getRank().equals(r)) {
4992
                        parent=CdmBase.deproxy(tb, Taxon.class);
4993
                    }
4994
                    break;
4995
                } catch (Exception e) {
4996
                    // TODO Auto-generated catch block
4997
                    e.printStackTrace();
4998
                }
4999
            }
5000
            if(parent == null) {
5001
                INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5002
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5003
                if(tmp ==null)
5004
                {
5005
                    parent=Taxon.NewInstance(parentNameName, ref);
5006
                    importer.getTaxonService().save(parent);
5007
                    parent = CdmBase.deproxy(parent, Taxon.class);
5008
                } else {
5009
                    parent=tmp;
5010
                }
5011
                lookForParentNode(parentNameName, parent, ref,myName);
5012

    
5013
            }
5014
            hierarchy.put(r,parent);
5015
        }
5016
    }
5017

    
5018
    /**
5019
     * @param ref
5020
     * @param myName
5021
     * @param parser
5022
     */
5023
    private void handleTribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5024
        String parentStr = myName.getSubfamilyStr();
5025
        Rank r = Rank.SUBFAMILY();
5026
        if (parentStr == null){
5027
            parentStr = myName.getFamilyStr();
5028
            r = Rank.FAMILY();
5029
        }
5030
        if(parentStr!=null){
5031
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5032
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5033
            //                    importer.getTaxonService().save(parent);
5034
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5035

    
5036
            boolean parentDoesNotExists = true;
5037
            for (TaxonNode p : classification.getAllNodes()){
5038
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5039
                    parentDoesNotExists = false;
5040
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5041
                    break;
5042
                }
5043
            }
5044
            //                if(parentDoesNotExists) {
5045
            //                    importer.getTaxonService().save(parent);
5046
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5047
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5048
            //                }
5049
            if(parentDoesNotExists) {
5050
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5051
                if(tmp ==null)
5052
                {
5053
                    parent=Taxon.NewInstance(parentNameName, ref);
5054
                    importer.getTaxonService().save(parent);
5055
                    parent = CdmBase.deproxy(parent, Taxon.class);
5056
                } else {
5057
                    parent=tmp;
5058
                }
5059
                lookForParentNode(parentNameName, parent, ref,myName);
5060

    
5061
            }
5062
            hierarchy.put(r,parent);
5063
        }
5064
    }
5065

    
5066
    /**
5067
     * @param ref
5068
     * @param myName
5069
     * @param parser
5070
     */
5071
    private void handleSubtribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5072
        String parentStr = myName.getTribeStr();
5073
        Rank r = Rank.TRIBE();
5074
        if (parentStr == null){
5075
            parentStr = myName.getSubfamilyStr();
5076
            r = Rank.SUBFAMILY();
5077
        }
5078
        if (parentStr == null){
5079
            parentStr = myName.getFamilyStr();
5080
            r = Rank.FAMILY();
5081
        }
5082
        if(parentStr!=null){
5083
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5084
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5085
            //                    importer.getTaxonService().save(parent);
5086
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5087

    
5088
            boolean parentDoesNotExists = true;
5089
            for (TaxonNode p : classification.getAllNodes()){
5090
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5091
                    parentDoesNotExists = false;
5092
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5093

    
5094
                    break;
5095
                }
5096
            }
5097
            //                if(parentDoesNotExists) {
5098
            //                    importer.getTaxonService().save(parent);
5099
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5100
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5101
            //                }
5102
            if(parentDoesNotExists) {
5103
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5104
                if(tmp ==null)
5105
                {
5106
                    parent=Taxon.NewInstance(parentNameName, ref);
5107
                    importer.getTaxonService().save(parent);
5108
                    parent = CdmBase.deproxy(parent, Taxon.class);
5109
                } else {
5110
                    parent=tmp;
5111
                }
5112
                lookForParentNode(parentNameName, parent, ref,myName);
5113

    
5114
            }
5115
            hierarchy.put(r,parent);
5116
        }
5117
    }
5118

    
5119
    /**
5120
     * @param ref
5121
     * @param myName
5122
     * @param parser
5123
     */
5124
    private void handleGenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5125
        String parentStr = myName.getSubtribeStr();
5126
        Rank r = Rank.SUBTRIBE();
5127
        if (parentStr == null){
5128
            parentStr = myName.getTribeStr();
5129
            r = Rank.TRIBE();
5130
        }
5131
        if (parentStr == null){
5132
            parentStr = myName.getSubfamilyStr();
5133
            r = Rank.SUBFAMILY();
5134
        }
5135
        if (parentStr == null){
5136
            parentStr = myName.getFamilyStr();
5137
            r = Rank.FAMILY();
5138
        }
5139
        if(parentStr!=null){
5140
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5141
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5142
            //                    importer.getTaxonService().save(parent);
5143
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5144

    
5145
            boolean parentDoesNotExist = true;
5146
            for (TaxonNode p : classification.getAllNodes()){
5147
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5148
                    //                        System.out.println(p.getTaxon().getUuid());
5149
                    //                        System.out.println(parent.getUuid());
5150
                    parentDoesNotExist = false;
5151
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5152
                    break;
5153
                }
5154
            }
5155
            //                if(parentDoesNotExists) {
5156
            //                    importer.getTaxonService().save(parent);
5157
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5158
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5159
            //                }
5160
            if(parentDoesNotExist) {
5161
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5162
                if(tmp ==null){
5163

    
5164
                    parent=Taxon.NewInstance(parentNameName, ref);
5165
                    importer.getTaxonService().save(parent);
5166
                    parent = CdmBase.deproxy(parent, Taxon.class);
5167
                } else {
5168
                    parent=tmp;
5169
                }
5170
                lookForParentNode(parentNameName, parent, ref,myName);
5171

    
5172
            }
5173
            hierarchy.put(r,parent);
5174
        }
5175
    }
5176

    
5177
    /**
5178
     * @param ref
5179
     * @param myName
5180
     * @param parser
5181
     */
5182
    private void handleSubgenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5183
        String parentStr = myName.getGenusStr();
5184
        Rank r = Rank.GENUS();
5185

    
5186
        if(parentStr==null){
5187
            parentStr = myName.getSubtribeStr();
5188
            r = Rank.SUBTRIBE();
5189
        }
5190
        if (parentStr == null){
5191
            parentStr = myName.getTribeStr();
5192
            r = Rank.TRIBE();
5193
        }
5194
        if (parentStr == null){
5195
            parentStr = myName.getSubfamilyStr();
5196
            r = Rank.SUBFAMILY();
5197
        }
5198
        if (parentStr == null){
5199
            parentStr = myName.getFamilyStr();
5200
            r = Rank.FAMILY();
5201
        }
5202
        if(parentStr!=null){
5203
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5204
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5205
            //                    importer.getTaxonService().save(parent);
5206
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5207

    
5208
            boolean parentDoesNotExists = true;
5209
            for (TaxonNode p : classification.getAllNodes()){
5210
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5211
                    //                        System.out.println(p.getTaxon().getUuid());
5212
                    //                        System.out.println(parent.getUuid());
5213
                    parentDoesNotExists = false;
5214
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5215
                    break;
5216
                }
5217
            }
5218
            //                if(parentDoesNotExists) {
5219
            //                    importer.getTaxonService().save(parent);
5220
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5221
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5222
            //                }
5223
            if(parentDoesNotExists) {
5224
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5225
                if(tmp ==null)
5226
                {
5227
                    parent=Taxon.NewInstance(parentNameName, ref);
5228
                    importer.getTaxonService().save(parent);
5229
                    parent = CdmBase.deproxy(parent, Taxon.class);
5230
                } else {
5231
                    parent=tmp;
5232
                }
5233
                lookForParentNode(parentNameName, parent, ref,myName);
5234

    
5235
            }
5236
            hierarchy.put(r,parent);
5237
        }
5238
    }
5239

    
5240
    /**
5241
     * @param ref
5242
     * @param myName
5243
     * @param parser
5244
     */
5245
    private void handleSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5246
        String parentStr = myName.getSubgenusStr();
5247
        Rank r = Rank.SUBGENUS();
5248

    
5249
        if(parentStr==null){
5250
            parentStr = myName.getGenusStr();
5251
            r = Rank.GENUS();
5252
        }
5253

    
5254
        if(parentStr==null){
5255
            parentStr = myName.getSubtribeStr();
5256
            r = Rank.SUBTRIBE();
5257
        }
5258
        if (parentStr == null){
5259
            parentStr = myName.getTribeStr();
5260
            r = Rank.TRIBE();
5261
        }
5262
        if (parentStr == null){
5263
            parentStr = myName.getSubfamilyStr();
5264
            r = Rank.SUBFAMILY();
5265
        }
5266
        if (parentStr == null){
5267
            parentStr = myName.getFamilyStr();
5268
            r = Rank.FAMILY();
5269
        }
5270
        if(parentStr!=null){
5271
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5272
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5273
            hierarchy.put(r,parent);
5274
        }
5275
    }
5276

    
5277
    /**
5278
     * @param ref
5279
     * @param myName
5280
     * @param parser
5281
     */
5282
    private void handleSubSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5283
        String parentStr = myName.getSpeciesStr();
5284
        Rank r = Rank.SPECIES();
5285

    
5286

    
5287
        if(parentStr==null){
5288
            parentStr = myName.getSubgenusStr();
5289
            r = Rank.SUBGENUS();
5290
        }
5291

    
5292
        if(parentStr==null){
5293
            parentStr = myName.getGenusStr();
5294
            r = Rank.GENUS();
5295
        }
5296

    
5297
        if(parentStr==null){
5298
            parentStr = myName.getSubtribeStr();
5299
            r = Rank.SUBTRIBE();
5300
        }
5301
        if (parentStr == null){
5302
            parentStr = myName.getTribeStr();
5303
            r = Rank.TRIBE();
5304
        }
5305
        if (parentStr == null){
5306
            parentStr = myName.getSubfamilyStr();
5307
            r = Rank.SUBFAMILY();
5308
        }
5309
        if (parentStr == null){
5310
            parentStr = myName.getFamilyStr();
5311
            r = Rank.FAMILY();
5312
        }
5313
        if(parentStr!=null){
5314
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5315
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5316
            hierarchy.put(r,parent);
5317
        }
5318
    }
5319

    
5320

    
5321
    /**
5322
     * @param ref
5323
     * @param myName
5324
     * @param parser
5325
     */
5326
    private void handleFormHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5327
        String parentStr = myName.getSubspeciesStr();
5328
        Rank r = Rank.SUBSPECIES();
5329

    
5330

    
5331
        if(parentStr==null){
5332
            parentStr = myName.getSpeciesStr();
5333
            r = Rank.SPECIES();
5334
        }
5335

    
5336
        if(parentStr==null){
5337
            parentStr = myName.getSubgenusStr();
5338
            r = Rank.SUBGENUS();
5339
        }
5340

    
5341
        if(parentStr==null){
5342
            parentStr = myName.getGenusStr();
5343
            r = Rank.GENUS();
5344
        }
5345

    
5346
        if(parentStr==null){
5347
            parentStr = myName.getSubtribeStr();
5348
            r = Rank.SUBTRIBE();
5349
        }
5350
        if (parentStr == null){
5351
            parentStr = myName.getTribeStr();
5352
            r = Rank.TRIBE();
5353
        }
5354
        if (parentStr == null){
5355
            parentStr = myName.getSubfamilyStr();
5356
            r = Rank.SUBFAMILY();
5357
        }
5358
        if (parentStr == null){
5359
            parentStr = myName.getFamilyStr();
5360
            r = Rank.FAMILY();
5361
        }
5362
        if(parentStr!=null){
5363
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5364
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5365
            hierarchy.put(r,parent);
5366
        }
5367
    }
5368

    
5369
    /**
5370
     * @param ref
5371
     * @param myName
5372
     * @param parser
5373
     */
5374
    private void handleVarietyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5375
        String parentStr = myName.getSubspeciesStr();
5376
        Rank r = Rank.SUBSPECIES();
5377

    
5378
        if(parentStr==null){
5379
            parentStr = myName.getSpeciesStr();
5380
            r = Rank.SPECIES();
5381
        }
5382

    
5383
        if(parentStr==null){
5384
            parentStr = myName.getSubgenusStr();
5385
            r = Rank.SUBGENUS();
5386
        }
5387

    
5388
        if(parentStr==null){
5389
            parentStr = myName.getGenusStr();
5390
            r = Rank.GENUS();
5391
        }
5392

    
5393
        if(parentStr==null){
5394
            parentStr = myName.getSubtribeStr();
5395
            r = Rank.SUBTRIBE();
5396
        }
5397
        if (parentStr == null){
5398
            parentStr = myName.getTribeStr();
5399
            r = Rank.TRIBE();
5400
        }
5401
        if (parentStr == null){
5402
            parentStr = myName.getSubfamilyStr();
5403
            r = Rank.SUBFAMILY();
5404
        }
5405
        if (parentStr == null){
5406
            parentStr = myName.getFamilyStr();
5407
            r = Rank.FAMILY();
5408
        }
5409
        if(parentStr!=null){
5410
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5411
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5412
            hierarchy.put(r,parent);
5413
        }
5414
    }
5415

    
5416
    /**
5417
     * @param ref
5418
     * @param myName
5419
     * @param parser
5420
     * @param parentStr
5421
     * @param r
5422
     * @return
5423
     */
5424
    private Taxon handleParentName(Reference ref, MyName myName, INonViralNameParser<?> parser, String parentStr, Rank r) {
5425
        INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5426
        Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5427
        //                    importer.getTaxonService().save(parent);
5428
        //                    parent = CdmBase.deproxy(parent, Taxon.class);
5429

    
5430
        boolean parentDoesNotExists = true;
5431
        for (TaxonNode p : classification.getAllNodes()){
5432
            if(p.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent.getTitleCache().split("sec.")[0].trim())) {
5433
                //                        System.out.println(p.getTaxon().getUuid());
5434
                //                        System.out.println(parent.getUuid());
5435
                parentDoesNotExists = false;
5436
                parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5437
                break;
5438
            }
5439
        }
5440
        if(parentDoesNotExists) {
5441
            Taxon tmp = findMatchingTaxon(parentNameName,ref);
5442
            //                    System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5443
            if(tmp ==null){
5444

    
5445
                parent=Taxon.NewInstance(parentNameName, ref);
5446
                importer.getTaxonService().save(parent);
5447

    
5448
            } else {
5449
                parent=tmp;
5450
            }
5451
            lookForParentNode(parentNameName, parent, ref,myName);
5452

    
5453
        }
5454
        return parent;
5455
    }
5456

    
5457
    private void addNameDifferenceToFile(String originalname, String atomisedname){
5458
        try{
5459
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NamesDifferent_"+classification.getTitleCache()+".txt",true);
5460
            BufferedWriter out = new BufferedWriter(fstream);
5461
            out.write(originalname+" (original) versus "+replaceNull(atomisedname)+" (atomised) \n");
5462
            //Close the output stream
5463
            out.close();
5464
        }catch (Exception e){//Catch exception if any
5465
            System.err.println("Error: " + e.getMessage());
5466
        }
5467
    }
5468
    /**
5469
     * @param name
5470
     * @param author
5471
     * @param nomenclaturalCode2
5472
     * @param rank
5473
     */
5474
    private void addProblemNameToFile(String name, String author, NomenclaturalCode nomenclaturalCode2, Rank rank) {
5475
        try{
5476
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed.txt",true);
5477
            BufferedWriter out = new BufferedWriter(fstream);
5478
            out.write(name+"\t"+replaceNull(author)+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\n");
5479
            //Close the output stream
5480
            out.close();
5481
        }catch (Exception e){//Catch exception if any
5482
            System.err.println("Error: " + e.getMessage());
5483
        }
5484
    }
5485

    
5486

    
5487
    /**
5488
     * @param tnb
5489
     * @param bestMatchingTaxon
5490
     * @param insertAsExisting
5491
     * @param refMods
5492
     */
5493
    private void logDecision(INonViralName tnb, Taxon bestMatchingTaxon, boolean insertAsExisting, Reference refMods) {
5494
        try{
5495
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "Decisions_"+classification.toString()+".txt", true);
5496
            BufferedWriter out = new BufferedWriter(fstream);
5497
            out.write(tnb.getTitleCache() + " sec. " + refMods + "\t" + bestMatchingTaxon.getTitleCache() + "\t" + insertAsExisting + "\n");
5498
            //Close the output stream
5499
            out.close();
5500
        }catch (Exception e){//Catch exception if any
5501
            System.err.println("Error: " + e.getMessage());
5502
        }
5503
    }
5504

    
5505

    
5506
    @SuppressWarnings("unused")
5507
    private String replaceNull(Object in){
5508
        if (in == null) {
5509
            return "";
5510
        }
5511
        if (in.getClass().equals(NomenclaturalCode.class)) {
5512
            return ((NomenclaturalCode)in).getTitleCache();
5513
        }
5514
        return in.toString();
5515
    }
5516

    
5517
    /**
5518
     * @param fullName
5519
     * @param nomenclaturalCode2
5520
     * @param rank
5521
     */
5522
    private void addProblemNameToFile(String type, String name, NomenclaturalCode nomenclaturalCode2, Rank rank, String problems) {
5523
        try{
5524
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed_"+classification.getTitleCache()+".txt",true);
5525
            BufferedWriter out = new BufferedWriter(fstream);
5526
            out.write(type+"\t"+name+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\t"+problems+"\n");
5527
            //Close the output stream
5528
            out.close();
5529
        }catch (Exception e){//Catch exception if any
5530
            System.err.println("Error: " + e.getMessage());
5531
        }
5532

    
5533
    }
5534

    
5535
}
5536

    
5537

    
5538

    
(8-8/9)