Project

General

Profile

Download (232 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
 * Copyright (C) 2013 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9
package eu.etaxonomy.cdm.io.taxonx2013;
10

    
11
import java.io.BufferedWriter;
12
import java.io.File;
13
import java.io.FileWriter;
14
import java.io.IOException;
15
import java.net.URI;
16
import java.util.ArrayList;
17
import java.util.Arrays;
18
import java.util.HashMap;
19
import java.util.List;
20
import java.util.Map;
21
import java.util.Set;
22
import java.util.UUID;
23
import java.util.regex.Matcher;
24
import java.util.regex.Pattern;
25

    
26
import javax.xml.transform.TransformerException;
27
import javax.xml.transform.TransformerFactoryConfigurationError;
28

    
29
import org.apache.commons.lang.StringUtils;
30
import org.apache.log4j.Logger;
31
import org.w3c.dom.Node;
32
import org.w3c.dom.NodeList;
33

    
34
import com.ibm.lsid.MalformedLSIDException;
35

    
36
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
37
import eu.etaxonomy.cdm.api.service.pager.Pager;
38
import eu.etaxonomy.cdm.model.agent.AgentBase;
39
import eu.etaxonomy.cdm.model.agent.Person;
40
import eu.etaxonomy.cdm.model.common.CdmBase;
41
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
42
import eu.etaxonomy.cdm.model.common.LSID;
43
import eu.etaxonomy.cdm.model.common.Language;
44
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
45
import eu.etaxonomy.cdm.model.description.Feature;
46
import eu.etaxonomy.cdm.model.description.FeatureNode;
47
import eu.etaxonomy.cdm.model.description.FeatureTree;
48
import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
49
import eu.etaxonomy.cdm.model.description.TaxonDescription;
50
import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
51
import eu.etaxonomy.cdm.model.description.TextData;
52
import eu.etaxonomy.cdm.model.name.INonViralName;
53
import eu.etaxonomy.cdm.model.name.ITaxonNameBase;
54
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
55
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
56
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
57
import eu.etaxonomy.cdm.model.name.Rank;
58
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
59
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
60
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
61
import eu.etaxonomy.cdm.model.reference.Reference;
62
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
63
import eu.etaxonomy.cdm.model.taxon.Classification;
64
import eu.etaxonomy.cdm.model.taxon.Synonym;
65
import eu.etaxonomy.cdm.model.taxon.SynonymType;
66
import eu.etaxonomy.cdm.model.taxon.Taxon;
67
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
68
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
69
import eu.etaxonomy.cdm.persistence.dto.UuidAndTitleCache;
70
import eu.etaxonomy.cdm.persistence.query.MatchMode;
71
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
72
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
73
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
74
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImplRegExBase;
75

    
76
/**
77
 * @author pkelbert
78
 * @date 2 avr. 2013
79
 *
80
 */
81
public class TaxonXTreatmentExtractor extends TaxonXExtractor{
82

    
83
    private static final String PUBLICATION_YEAR = "publicationYear";
84

    
85
	private static final Logger logger = Logger.getLogger(TaxonXTreatmentExtractor.class);
86

    
87
    private static final String notMarkedUp = "Not marked-up";
88
    private static final UUID proIbioTreeUUID = UUID.fromString("2c49f506-c7f7-44de-a8b9-2e695de3769c");
89
    private static final UUID OtherUUID = UUID.fromString("6465f8aa-2175-446f-807e-7163994b120f");
90
    private static final UUID NotMarkedUpUUID = UUID.fromString("796fe3a5-2c9c-4a89-b298-7598ca944063");
91
    private static final boolean skippQuestion = true;
92

    
93
    private final NomenclaturalCode nomenclaturalCode;
94
    private Classification classification;
95

    
96
    private  String treatmentMainName,originalTreatmentName;
97

    
98
    private final HashMap<String,Map<String,String>> namesMap = new HashMap<String, Map<String,String>>();
99

    
100

    
101
    private final Pattern keypattern = Pattern.compile("^(\\d+.*|-\\d+.*)");
102
    private final Pattern keypatternend = Pattern.compile("^.+?\\d$");
103

    
104
    private boolean maxRankRespected =false;
105
    private Map<String, Feature> featuresMap;
106

    
107
    private MyName currentMyName;
108

    
109
    private Reference sourceUrlRef;
110

    
111
    private String followingText;  //text element immediately following a tax:name in tax:nomenclature TODO move do state
112
    private String usedFollowingTextPrefix; //the part of the following text which has been used during taxon name creation
113

    
114
    private final TaxonXAddSources sourceHandler = new TaxonXAddSources();
115

    
116
    /**
117
     * @param nomenclaturalCode
118
     * @param classification
119
     * @param importer
120
     * @param configState
121
     */
122
    public TaxonXTreatmentExtractor(NomenclaturalCode nomenclaturalCode, Classification classification, TaxonXImport importer,
123
            TaxonXImportState configState,Map<String, Feature> featuresMap,  Reference urlSource) {
124
        this.nomenclaturalCode=nomenclaturalCode;
125
        this.classification = classification;
126
        this.importer=importer;
127
        this.state2=configState;
128
        this.featuresMap=featuresMap;
129
        this.sourceUrlRef =urlSource;
130
        prepareCollectors(configState, importer.getAgentService());
131
        this.sourceHandler.setSourceUrlRef(sourceUrlRef);
132
        this.sourceHandler.setImporter(importer);
133
        this.sourceHandler.setConfigState(configState);
134
    }
135

    
136
    /**
137
     * extracts all the treament information and save them
138
     * @param treatmentnode: the XML Node
139
     * @param tosave: the list of object to save into the CDM
140
     * @param refMods: the reference extracted from the MODS
141
     * @param sourceName: the URI of the document
142
     */
143
    @SuppressWarnings({ "rawtypes", "unused" })
144

    
145
    protected void extractTreatment(Node treatmentnode, Reference refMods, URI sourceName) {        logger.info("extractTreatment");
146
        List<TaxonNameBase> namesToSave = new ArrayList<TaxonNameBase>();
147
        NodeList children = treatmentnode.getChildNodes();
148
        Taxon acceptedTaxon =null;
149
        boolean hasRefgroup=false;
150

    
151
        //needed?
152
        for (int i=0;i<children.getLength();i++){
153
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:ref_group")) {
154
            	hasRefgroup=true;
155
            }
156
        }
157

    
158
        for (int i=0;i<children.getLength();i++){
159
        	Node child = children.item(i);
160
    		acceptedTaxon = handleSingleNode(refMods, sourceName, namesToSave, child, acceptedTaxon);
161
        }
162
        //        logger.info("saveUpdateNames");
163
        if (maxRankRespected){
164
            importer.getNameService().saveOrUpdate(namesToSave);
165
            importer.getClassificationService().saveOrUpdate(classification);
166
            //logger.info("saveUpdateNames-ok");
167
        }
168

    
169
        buildFeatureTree();
170
    }
171

    
172
	private Taxon handleSingleNode(Reference refMods, URI sourceName,
173
			List<TaxonNameBase> namesToSave, Node child, Taxon acceptedTaxon) {
174
		Taxon defaultTaxon =null;
175

    
176
		String nodeName = child.getNodeName();
177
		if (nodeName.equalsIgnoreCase("tax:nomenclature")){
178
		    NodeList nomenclatureChildren = child.getChildNodes();
179
		    boolean containsName = false;
180
		    for(int k=0; k<nomenclatureChildren.getLength(); k++){
181
		        if(nomenclatureChildren.item(k).getNodeName().equalsIgnoreCase("tax:name")){
182
		            containsName=true;
183
		            break;
184
		        }
185
		    }
186
		    if (containsName){
187
		        reloadClassification();
188
		        //extract "main" the scientific name
189
		        try{
190
		            acceptedTaxon = extractNomenclature(child, namesToSave, refMods);
191
		        }catch(ClassCastException e){
192
		        	//FIXME exception handling
193
		        	e.printStackTrace();
194
		        }
195
		        //                    System.out.println("acceptedTaxon : "+acceptedTaxon);
196
		    }
197
		}else if (nodeName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
198
		    reloadClassification();
199
		    //extract the References within the document
200
		    extractReferences(child, namesToSave ,acceptedTaxon,refMods);
201
		}else if (nodeName.equalsIgnoreCase("tax:div") &&
202
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("multiple") && maxRankRespected){
203
		    File file = new File(TaxonXImport.LOG_FOLDER + "multipleTaxonX.txt");
204
		    FileWriter writer;
205
		    try {
206
		        writer = new FileWriter(file ,true);
207
		        writer.write(sourceName+"\n");
208
		        writer.flush();
209
		        writer.close();
210
		    } catch (IOException e1) {
211
		        // TODO Auto-generated catch block
212
		        logger.error(e1.getMessage());
213
		    }
214
		    //                String multiple = askMultiple(children.item(i));
215
		    String multiple = "Other";
216
		    if (multiple.equalsIgnoreCase("other")) {
217
		        extractSpecificFeatureNotStructured(child,acceptedTaxon, defaultTaxon,namesToSave, refMods,multiple);
218
		    }else if (multiple.equalsIgnoreCase("synonyms")) {
219
		        try{
220
		            extractSynonyms(child,acceptedTaxon, refMods, null);
221
		        }catch(NullPointerException e){
222
		            logger.warn("the accepted taxon is maybe null");
223
		        }
224
		    }else if(multiple.equalsIgnoreCase("material examined")){
225
		    	extractMaterials(child, acceptedTaxon, refMods, namesToSave);
226
		    }else if (multiple.equalsIgnoreCase("distribution")){
227
		    	extractDistribution(child, acceptedTaxon, defaultTaxon, namesToSave, refMods);
228
		    }else if (multiple.equalsIgnoreCase("type status")){
229
		    	extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, "TypeStatus");
230
		    }else if (multiple.equalsIgnoreCase("vernacular name")){
231
		    	extractDescriptionWithReference(child, acceptedTaxon, defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
232
		    }else{
233
		    	extractSpecificFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,multiple);
234
		    }
235
		}
236
		else if(nodeName.equalsIgnoreCase("tax:div") &&
237
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("biology_ecology") && maxRankRespected){
238
		    extractFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, Feature.BIOLOGY_ECOLOGY());
239
		}
240
		else if(nodeName.equalsIgnoreCase("tax:div") &&
241
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("vernacularName") && maxRankRespected){
242
		    extractDescriptionWithReference(child, acceptedTaxon,defaultTaxon,refMods, Feature.COMMON_NAME().getTitleCache());
243
		}
244
		else if(nodeName.equalsIgnoreCase("tax:div") &&
245
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("description") && maxRankRespected){
246
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
247
		}
248
		else if(nodeName.equalsIgnoreCase("tax:div") &&
249
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("diagnosis") && maxRankRespected){
250
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,Feature.DIAGNOSIS());
251
		}
252
		else if(nodeName.equalsIgnoreCase("tax:div") &&
253
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("discussion") && maxRankRespected){
254
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DISCUSSION());
255
		}
256
		else if(nodeName.equalsIgnoreCase("tax:div") &&
257
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("note") && maxRankRespected){
258
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, Feature.DESCRIPTION());
259
		}
260
		else if(nodeName.equalsIgnoreCase("tax:div") &&
261
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("distribution") && maxRankRespected){
262
		    extractDistribution(child,acceptedTaxon,defaultTaxon,namesToSave, refMods);
263
		}
264
		else if(nodeName.equalsIgnoreCase("tax:div") &&
265
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("etymology") && maxRankRespected){
266
		    extractFeature(child,acceptedTaxon,defaultTaxon,namesToSave,refMods,Feature.ETYMOLOGY());
267
		}
268
		else if(nodeName.equalsIgnoreCase("tax:div") &&
269
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("materials_examined") && maxRankRespected){
270
		    extractMaterials(child,acceptedTaxon, refMods, namesToSave);
271
		}
272
		else if(nodeName.equalsIgnoreCase("tax:figure") && maxRankRespected){
273
		    extractSpecificFeature(child,acceptedTaxon,defaultTaxon, namesToSave, refMods, "Figure");
274
		}
275
		else if(nodeName.equalsIgnoreCase("tax:div") &&
276
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") && maxRankRespected){
277
		    extractSpecificFeature(child, acceptedTaxon,defaultTaxon, namesToSave, refMods, "table");
278
		}else if(nodeName.equalsIgnoreCase("tax:div") &&
279
				child.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("key") && maxRankRespected){
280
		    //TODO IGNORE keys for the moment
281
		    //extractKey(children.item(i),acceptedTaxon, nameToSave,source, refMods);
282
		    extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods,"Keys - unparsed");
283
		}
284
		else{
285
		    if (! nodeName.equalsIgnoreCase("tax:pb")){
286
		        //logger.info("ANOTHER KIND OF NODES: "+children.item(i).getNodeName()+", "+children.item(i).getAttributes());
287
		        if (child.getAttributes() !=null) {
288
		            logger.info("First Attribute: " + child.getAttributes().item(0));
289
		        }
290
		        extractSpecificFeatureNotStructured(child,acceptedTaxon,defaultTaxon,namesToSave, refMods, notMarkedUp);
291
		    }else{
292
		    	//FIXME
293
		    	logger.warn("Unhandled");
294
		    }
295
		}
296
		return acceptedTaxon;
297
	}
298

    
299

    
300
    protected Map<String,Feature> getFeaturesUsed(){
301
        return featuresMap;
302
    }
303
    /**
304
     *
305
     */
306
    private void buildFeatureTree() {
307
        logger.info("buildFeatureTree");
308
        FeatureTree proibiospheretree = importer.getFeatureTreeService().find(proIbioTreeUUID);
309
        if (proibiospheretree == null){
310
            List<FeatureTree> trees = importer.getFeatureTreeService().list(FeatureTree.class, null, null, null, null);
311
            if (trees.size()==1) {
312
                FeatureTree ft = trees.get(0);
313
                if (featuresMap==null) {
314
                    featuresMap=new HashMap<String, Feature>();
315
                }
316
                for (Feature feature: ft.getDistinctFeatures()){
317
                    if(feature!=null) {
318
                        featuresMap.put(feature.getTitleCache(), feature);
319
                    }
320
                }
321
            }
322
            proibiospheretree = FeatureTree.NewInstance();
323
            proibiospheretree.setUuid(proIbioTreeUUID);
324
        }
325
        //        FeatureNode root = proibiospheretree.getRoot();
326
        FeatureNode root2 = proibiospheretree.getRoot();
327
        if (root2 != null){
328
            int nbChildren = root2.getChildCount()-1;
329
            while (nbChildren>-1){
330
                try{
331
                    root2.removeChild(nbChildren);
332
                }catch(Exception e){logger.warn("Can't remove child from FeatureTree "+e);}
333
                nbChildren --;
334
            }
335

    
336
        }
337

    
338
        for (Feature feature:featuresMap.values()) {
339
            root2.addChild(FeatureNode.NewInstance(feature));
340
        }
341
        importer.getFeatureTreeService().saveOrUpdate(proibiospheretree);
342

    
343
    }
344

    
345

    
346
    /**
347
     * @param keys
348
     * @param acceptedTaxon: the current acceptedTaxon
349
     * @param nametosave: the list of objects to save into the CDM
350
     * @param refMods: the current reference extracted from the MODS
351
     */
352
    /*   @SuppressWarnings("rawtypes")
353
    private void extractKey(Node keys, Taxon acceptedTaxon,List<TaxonNameBase> nametosave, Reference refMods) {
354
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
355

    
356
        NodeList children = keys.getChildNodes();
357
        String key="";
358
        PolytomousKey poly =  PolytomousKey.NewInstance();
359
        poly.addSource(OriginalSourceType.Import, null,null,refMods,null);
360
        poly.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
361
        poly.addTaxonomicScope(acceptedTaxon);
362
        poly.setTitleCache("bloup", true);
363
        //        poly.addCoveredTaxon(acceptedTaxon);
364
        PolytomousKeyNode root = poly.getRoot();
365
        PolytomousKeyNode previous = null,tmpKey=null;
366
        Taxon taxonKey=null;
367
        List<PolytomousKeyNode> polyNodes = new ArrayList<PolytomousKeyNode>();
368

    
369
        //        String fullContent = keys.getTextContent();
370
        for (int i=0;i<children.getLength();i++){
371
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
372
                NodeList paragraph = children.item(i).getChildNodes();
373
                key="";
374
                taxonKey=null;
375
                for (int j=0;j<paragraph.getLength();j++){
376
                    if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
377
                        if (! paragraph.item(j).getTextContent().trim().isEmpty()){
378
                            key+=paragraph.item(j).getTextContent().trim();
379
                            //                            logger.info("KEY: "+j+"--"+key);
380
                        }
381
                    }
382
                    if(paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
383
                        taxonKey=getTaxonFromXML(paragraph.item(j),nametosave,refMods);
384
                    }
385
                }
386
                //                logger.info("keypattern.matcher(key).matches(): "+keypattern.matcher(key).matches());
387
                if (keypattern.matcher(key).matches()){
388
                    tmpKey = PolytomousKeyNode.NewInstance(key);
389
                    if (taxonKey!=null) {
390
                        tmpKey.setTaxon(taxonKey);
391
                    }
392
                    polyNodes.add(tmpKey);
393
                    if (previous == null) {
394
                        root.addChild(tmpKey);
395
                    } else {
396
                        previous.addChild(tmpKey);
397
                    }
398
                }else{
399
                    if (!key.isEmpty()){
400
                        tmpKey=PolytomousKeyNode.NewInstance(key);
401
                        if (taxonKey!=null) {
402
                            tmpKey.setTaxon(taxonKey);
403
                        }
404
                        polyNodes.add(tmpKey);
405
                        if (keypatternend.matcher(key).matches()) {
406
                            root.addChild(tmpKey);
407
                            previous=tmpKey;
408
                        } else{
409
                            previous.addChild(tmpKey);
410
                        }
411

    
412
                    }
413
                }
414
            }
415
        }
416
        importer.getPolytomousKeyNodeService().saveOrUpdate(polyNodes);
417
        importer.getPolytomousKeyService().saveOrUpdate(poly);
418
    }
419
*/
420

    
421

    
422
    /**
423
     * @param taxons: the XML Nodegroup
424
     * @param nametosave: the list of objects to save into the CDM
425
     * @param acceptedTaxon: the current accepted Taxon
426
     * @param refMods: the current reference extracted from the MODS
427
     *
428
     * @return Taxon object built
429
     */
430
    @SuppressWarnings({ "rawtypes", "unused" })
431
    private TaxonNameBase getTaxonNameBaseFromXML(Node taxons, List<TaxonNameBase> nametosave, Reference refMods, boolean isSynonym) {
432
        //        logger.info("getTaxonFromXML");
433
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
434
        logger.info("getTaxonNameBaseFromXML");
435
        TaxonNameBase nameToBeFilled = null;
436

    
437
        currentMyName=new MyName(isSynonym);
438

    
439
        NomenclaturalStatusType statusType = null;
440
        try {
441
        	String followingText = null;  //needs to be checked if following text is possible
442
            currentMyName = extractScientificName(taxons,refMods, null);
443
        } catch (TransformerFactoryConfigurationError e1) {
444
            logger.warn(e1);
445
        } catch (TransformerException e1) {
446
            logger.warn(e1);
447
        }
448
        /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
449

    
450
        nameToBeFilled = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
451
        if (nameToBeFilled.hasProblem() &&
452
                !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
453
            //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
454
            addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
455
            nameToBeFilled=solveNameProblem(currentMyName.getOriginalName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
456
        }
457

    
458
        nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
459
         */
460
        nameToBeFilled = currentMyName.getTaxonNameBase();
461
        return nameToBeFilled;
462

    
463
    }
464

    
465

    
466
    /**
467
     *
468
     */
469
    private void reloadClassification() {
470
        logger.info("reloadClassification");
471
        Classification cl = importer.getClassificationService().find(classification.getUuid());
472
        if (cl != null){
473
            classification = cl;
474
        }else{
475
            importer.getClassificationService().saveOrUpdate(classification);
476
            classification = importer.getClassificationService().find(classification.getUuid());
477
        }
478
    }
479

    
480
    //    /**
481
    //     * Create a Taxon for the current NameBase, based on the current reference
482
    //     * @param taxonNameBase
483
    //     * @param refMods: the current reference extracted from the MODS
484
    //     * @return Taxon
485
    //     */
486
    //    @SuppressWarnings({ "unused", "rawtypes" })
487
    //    private Taxon getTaxon(TaxonNameBase taxonNameBase, Reference refMods) {
488
    //        Taxon t = new Taxon(taxonNameBase,null );
489
    //        if (!configState.getConfig().doKeepOriginalSecundum() || (t.getSec() == null)) {
490
    //            t.setSec(configState.getConfig().getSecundum());
491
    //            logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
492
    //        }
493
    //        /*<<<<<<< .courant
494
    //        boolean sourceExists=false;
495
    //        Set<IdentifiableSource> sources = t.getSources();
496
    //        for (IdentifiableSource src : sources){
497
    //            String micro = src.getCitationMicroReference();
498
    //            Reference r = src.getCitation();
499
    //            if (r.equals(refMods) && micro == null) {
500
    //                sourceExists=true;
501
    //            }
502
    //        }
503
    //        if(!sourceExists) {
504
    //            t.addSource(null,null,refMods,null);
505
    //        }
506
    //=======*/
507
    //        t.addSource(OriginalSourceType.Import,null,null,refMods,null);
508
    //        t.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
509
    //        return t;
510
    //    }
511

    
512
    private void  extractDescriptionWithReference(Node typestatus, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods,
513
            String featureName) {
514
        //        System.out.println("extractDescriptionWithReference !");
515
        logger.info("extractDescriptionWithReference");
516
        NodeList children = typestatus.getChildNodes();
517

    
518
        Feature currentFeature=getFeatureObjectFromString(featureName);
519

    
520
        String r="";String s="";
521
        for (int i=0;i<children.getLength();i++){
522
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
523
                s+=children.item(i).getTextContent().trim();
524
            }
525
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
526
                r+= children.item(i).getTextContent().trim();
527
            }
528
            if (s.indexOf(r)>-1) {
529
                s=s.split(r)[0];
530
            }
531
        }
532

    
533
        Reference currentref =  ReferenceFactory.newGeneric();
534
        if(!r.isEmpty()) {
535
            currentref.setTitleCache(r, true);
536
        } else {
537
            currentref=refMods;
538
        }
539
        setParticularDescription(s,acceptedTaxon,defaultTaxon, currentref, refMods,currentFeature);
540
    }
541

    
542
    /**
543
     * @param nametosave
544
     * @param distribution: the XML node group
545
     * @param acceptedTaxon: the current accepted Taxon
546
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
547
     * @param refMods: the current reference extracted from the MODS
548
     */
549
    @SuppressWarnings("rawtypes")
550
    private void extractDistribution(Node distribution, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> nametosave, Reference refMods) {
551
        logger.info("extractDistribution");
552
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
553
        NodeList children = distribution.getChildNodes();
554
        Map<Integer,List<MySpecimenOrObservation>> specimenOrObservations = new HashMap<Integer, List<MySpecimenOrObservation>>();
555
        Map<Integer,String> descriptionsFulltext = new HashMap<Integer,String>();
556

    
557
        for (int i=0;i<children.getLength();i++){
558
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
559
                NodeList paragraph = children.item(i).getChildNodes();
560
                for (int j=0;j<paragraph.getLength();j++){
561
                    if (paragraph.item(j).getNodeName().equalsIgnoreCase("#text")){
562
                        extractText(descriptionsFulltext, i, paragraph.item(j));
563
                    }
564
                    else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:name")){
565
                        extractInLine(nametosave, refMods, descriptionsFulltext, i,paragraph.item(j));
566
                    }
567
                    else if (paragraph.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")){
568
                        MySpecimenOrObservation specimenOrObservation = new MySpecimenOrObservation();
569
                        DerivedUnit derivedUnitBase = null;
570
                        specimenOrObservation = extractSpecimenOrObservation(paragraph.item(j), derivedUnitBase, SpecimenOrObservationType.DerivedUnit, null);
571
                        extractTextFromSpecimenOrObservation(specimenOrObservations, descriptionsFulltext, i, specimenOrObservation);
572
                    }
573
                }
574
            }
575
        }
576

    
577
        int m=0;
578
        for (int k:descriptionsFulltext.keySet()) {
579
            if (k>m) {
580
                m=k;
581
            }
582
        }
583
        for (int k:specimenOrObservations.keySet()) {
584
            if (k>m) {
585
                m=k;
586
            }
587
        }
588

    
589

    
590
        if(acceptedTaxon!=null){
591
            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
592
            Feature currentFeature = Feature.DISTRIBUTION();
593
            //        DerivedUnit derivedUnitBase=null;
594
            //        String descr="";
595
            for (int k=0;k<=m;k++){
596
                if(specimenOrObservations.keySet().contains(k)){
597
                    for (MySpecimenOrObservation soo:specimenOrObservations.get(k) ) {
598
                        handleAssociation(acceptedTaxon, refMods, td, soo);
599
                    }
600
                }
601

    
602
                if (descriptionsFulltext.keySet().contains(k)){
603
                    if (!stringIsEmpty(descriptionsFulltext.get(k).trim()) && (descriptionsFulltext.get(k).startsWith("Hab.") || descriptionsFulltext.get(k).startsWith("Habitat"))){
604
                        setParticularDescription(descriptionsFulltext.get(k),acceptedTaxon,defaultTaxon, refMods, Feature.HABITAT());
605
                        break;
606
                    }
607
                    else{
608
                        handleTextData(refMods, descriptionsFulltext, td, currentFeature, k);
609
                    }
610
                }
611

    
612
                if (descriptionsFulltext.keySet().contains(k) || specimenOrObservations.keySet().contains(k)){
613
                    acceptedTaxon.addDescription(td);
614
                    sourceHandler.addAndSaveSource(refMods, td, null);
615
                    importer.getTaxonService().saveOrUpdate(acceptedTaxon);
616
                }
617
            }
618
        }
619
    }
620

    
621
    /**
622
     * @param refMods
623
     * @param descriptionsFulltext
624
     * @param td
625
     * @param currentFeature
626
     * @param k
627
     */
628
    private void handleTextData(Reference refMods, Map<Integer, String> descriptionsFulltext, TaxonDescription td,
629
            Feature currentFeature, int k) {
630
        //logger.info("handleTextData");
631
        TextData textData = TextData.NewInstance();
632
        textData.setFeature(currentFeature);
633
        textData.putText(Language.UNKNOWN_LANGUAGE(), descriptionsFulltext.get(k));
634
        sourceHandler.addSource(refMods, textData);
635
        td.addElement(textData);
636
    }
637

    
638
    /**
639
     * @param acceptedTaxon
640
     * @param refMods
641
     * @param td
642
     * @param soo
643
     */
644
    private void handleAssociation(Taxon acceptedTaxon, Reference refMods, TaxonDescription td, MySpecimenOrObservation soo) {
645
        logger.info("handleAssociation");
646
        String descr=soo.getDescr();
647
        DerivedUnit derivedUnitBase = soo.getDerivedUnitBase();
648

    
649
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
650

    
651
        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
652

    
653
        Feature feature=null;
654
        feature = makeFeature(derivedUnitBase);
655
        if(!StringUtils.isEmpty(descr)) {
656
            derivedUnitBase.setTitleCache(descr, true);
657
        }
658

    
659
        IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
660

    
661
        taxonDescription.addElement(indAssociation);
662
        sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
663
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
664
        td.setDescribedSpecimenOrObservation(soo.getDerivedUnitBase());
665
    }
666

    
667
    /**
668
     * create an individualAssociation
669
     * @param refMods
670
     * @param derivedUnitBase
671
     * @param feature
672
     * @return
673
     */
674
    private IndividualsAssociation createIndividualAssociation(Reference refMods, DerivedUnit derivedUnitBase,
675
            Feature feature) {
676
        logger.info("createIndividualAssociation");
677
        IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
678
        indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
679
        indAssociation.setFeature(feature);
680
        indAssociation = sourceHandler.addSource(refMods, indAssociation);
681
        return indAssociation;
682
    }
683

    
684
    /**
685
     * @param specimenOrObservations
686
     * @param descriptionsFulltext
687
     * @param i
688
     * @param specimenOrObservation
689
     */
690
    private void extractTextFromSpecimenOrObservation(Map<Integer, List<MySpecimenOrObservation>> specimenOrObservations,
691
            Map<Integer, String> descriptionsFulltext, int i, MySpecimenOrObservation specimenOrObservation) {
692
        logger.info("extractTextFromSpecimenOrObservation");
693
        List<MySpecimenOrObservation> speObsList = specimenOrObservations.get(i);
694
        if (speObsList == null) {
695
            speObsList=new ArrayList<MySpecimenOrObservation>();
696
        }
697
        speObsList.add(specimenOrObservation);
698
        specimenOrObservations.put(i,speObsList);
699

    
700
        String s = specimenOrObservation.getDerivedUnitBase().toString();
701
        if (descriptionsFulltext.get(i) !=null){
702
            s = descriptionsFulltext.get(i)+" "+s;
703
        }
704
        descriptionsFulltext.put(i, s);
705
    }
706

    
707
    /**
708
     * Extract the text with the inline link to a taxon
709
     * @param nametosave
710
     * @param refMods
711
     * @param descriptionsFulltext
712
     * @param i
713
     * @param paragraph
714
     */
715
    @SuppressWarnings("rawtypes")
716
    private void extractInLine(List<TaxonNameBase> nametosave, Reference refMods, Map<Integer, String> descriptionsFulltext,
717
            int i, Node paragraph) {
718
        //logger.info("extractInLine");
719
        String inLine=getInlineTextForName(nametosave, refMods, paragraph);
720
        if (descriptionsFulltext.get(i) !=null){
721
            inLine = descriptionsFulltext.get(i)+inLine;
722
        }
723
        descriptionsFulltext.put(i, inLine);
724
    }
725

    
726
    /**
727
     * Extract the raw text from a Node
728
     * @param descriptionsFulltext
729
     * @param node
730
     * @param j
731
     */
732
    private void extractText(Map<Integer, String> descriptionsFulltext, int i, Node node) {
733
        //logger.info("extractText");
734
        if(!node.getTextContent().trim().isEmpty()) {
735
            String s =node.getTextContent().trim();
736
            if (descriptionsFulltext.get(i) !=null){
737
                s = descriptionsFulltext.get(i)+" "+s;
738
            }
739
            descriptionsFulltext.put(i, s);
740
        }
741
    }
742

    
743

    
744
    /**
745
     * @param materials: the XML node group
746
     * @param acceptedTaxon: the current accepted Taxon
747
     * @param refMods: the current reference extracted from the MODS
748
     */
749
    @SuppressWarnings("rawtypes")
750
    private void extractMaterials(Node materials, Taxon acceptedTaxon, Reference refMods,List<TaxonNameBase> nametosave) {
751
        logger.info("EXTRACTMATERIALS");
752
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
753
        NodeList children = materials.getChildNodes();
754
        NodeList events = null;
755
        //        String descr="";
756

    
757

    
758
        for (int i=0;i<children.getLength();i++){
759
            String rawAssociation="";
760
            boolean added=false;
761
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
762
                events = children.item(i).getChildNodes();
763
                for(int k=0;k<events.getLength();k++){
764
                    if (events.item(k).getNodeName().equalsIgnoreCase("tax:name")){
765
                        String inLine= getInlineTextForName(nametosave, refMods, events.item(k));
766
                        if(!inLine.isEmpty()) {
767
                            rawAssociation+=inLine;
768
                        }
769
                    }
770
                    if (! events.item(k).getNodeName().equalsIgnoreCase("tax:name")
771
                            && !events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
772
                        rawAssociation+= events.item(k).getTextContent().trim();
773
                    }
774
                    if(events.item(k).getNodeName().equalsIgnoreCase("tax:collection_event")){
775
                        if (!containsDistinctLetters(rawAssociation.replaceAll(";",""))) {
776
                            rawAssociation="no description text";
777
                        }
778
                        added=true;
779
                        handleDerivedUnitFacadeAndBase(acceptedTaxon, refMods, events.item(k), rawAssociation);
780
                    }
781
                    if (!rawAssociation.isEmpty() && !added){
782

    
783
                        Feature feature = Feature.MATERIALS_EXAMINED();
784
                        featuresMap.put(feature.getTitleCache(),feature);
785

    
786
                        TextData textData = createTextData(rawAssociation, refMods, feature);
787

    
788
                        if(! rawAssociation.isEmpty() && (acceptedTaxon!=null)){
789
                            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
790
                            td.addElement(textData);
791
                            acceptedTaxon.addDescription(td);
792
                            sourceHandler.addAndSaveSource(refMods, td, null);
793
                        }
794
                        //                        DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
795
                        //                        derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
796
                        //
797
                        //                        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
798
                        //                        acceptedTaxon.addDescription(taxonDescription);
799
                        //
800
                        //                        IndividualsAssociation indAssociation = IndividualsAssociation.NewInstance();
801
                        //
802
                        //                        Feature feature = Feature.MATERIALS_EXAMINED();
803
                        //                        featuresMap.put(feature.getTitleCache(),feature);
804
                        //                        if(!StringUtils.isEmpty(rawAssociation)) {
805
                        //                            derivedUnitBase.setTitleCache(rawAssociation, true);
806
                        //                        }
807
                        //                        indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
808
                        //                        indAssociation.setFeature(feature);
809
                        //                        indAssociation.addSource(OriginalSourceType.Import, null, null, refMods, null);
810
                        //
811
                        //                        /*boolean sourceExists=false;
812
                        //                        Set<DescriptionElementSource> dsources = indAssociation.getSources();
813
                        //                        for (DescriptionElementSource src : dsources){
814
                        //                            String micro = src.getCitationMicroReference();
815
                        //                            Reference r = src.getCitation();
816
                        //                            if (r.equals(refMods) && micro == null) {
817
                        //                                sourceExists=true;
818
                        //                            }
819
                        //                        }
820
                        //                        if(!sourceExists) {
821
                        //                            indAssociation.addSource(null, null, refMods, null);
822
                        //                        }*/
823
                        //                        taxonDescription.addElement(indAssociation);
824
                        //                        taxonDescription.setTaxon(acceptedTaxon);
825
                        //                        taxonDescription.addSource(OriginalSourceType.Import, null,null,refMods,null);
826
                        //
827
                        //                        /*sourceExists=false;
828
                        //                        Set<IdentifiableSource> sources = taxonDescription.getSources();
829
                        //                        for (IdentifiableSource src : sources){
830
                        //                            String micro = src.getCitationMicroReference();
831
                        //                            Reference r = src.getCitation();
832
                        //                            if (r.equals(refMods) && micro == null) {
833
                        //                                sourceExists=true;
834
                        //                            }
835
                        //                        }
836
                        //                        if(!sourceExists) {
837
                        //                            taxonDescription.addSource(OriginalSourceType.Import,null,null,refMods,null);
838
                        //                        }*/
839
                        //
840
                        //                        importer.getDescriptionService().saveOrUpdate(taxonDescription);
841
                        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
842

    
843
                        rawAssociation="";
844
                    }
845
                }
846
            }
847
        }
848
    }
849

    
850
    /**
851
     * @param acceptedTaxon
852
     * @param refMods
853
     * @param events
854
     * @param rawAssociation
855
     * @param k
856
     */
857
    private void handleDerivedUnitFacadeAndBase(Taxon acceptedTaxon, Reference refMods, Node event,
858
            String rawAssociation) {
859
        logger.info("handleDerivedUnitFacadeAndBase");
860
        String descr;
861
        DerivedUnit derivedUnitBase;
862
        MySpecimenOrObservation myspecimenOrObservation;
863
        DerivedUnitFacade derivedUnitFacade = getFacade(rawAssociation.replaceAll(";",""),SpecimenOrObservationType.DerivedUnit);
864
        derivedUnitBase = derivedUnitFacade.innerDerivedUnit();
865

    
866
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
867

    
868
        //TODO this may not always be correct, ask user
869
        TaxonNameBase<?,?> typifiableName = acceptedTaxon != null ?  acceptedTaxon.getName() : null;
870
        myspecimenOrObservation = extractSpecimenOrObservation(event,derivedUnitBase,SpecimenOrObservationType.DerivedUnit, typifiableName);
871
        derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
872
        descr=myspecimenOrObservation.getDescr();
873

    
874
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
875

    
876
        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
877

    
878
        Feature feature = makeFeature(derivedUnitBase);
879
        featuresMap.put(feature.getTitleCache(),feature);
880
        if(!StringUtils.isEmpty(descr)) {
881
            derivedUnitBase.setTitleCache(descr, true);
882
        }
883

    
884
        IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
885

    
886
        taxonDescription.addElement(indAssociation);
887
        sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
888
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
889
    }
890

    
891

    
892

    
893
    /**
894
     * @param currentName
895
     * @param materials: the XML node group
896
     * @param acceptedTaxon: the current accepted Taxon
897
     * @param refMods: the current reference extracted from the MODS
898
     */
899
    private String extractMaterialsDirect(Node materials, Taxon acceptedTaxon, Reference refMods, String event, TaxonNameBase<?,?> currentName) {
900
        logger.info("extractMaterialsDirect");
901
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
902
        String descr="";
903

    
904
        DerivedUnit derivedUnitBase=null;
905
        MySpecimenOrObservation myspecimenOrObservation = extractSpecimenOrObservation(materials,derivedUnitBase, SpecimenOrObservationType.DerivedUnit, currentName);
906
        derivedUnitBase = myspecimenOrObservation.getDerivedUnitBase();
907

    
908
        sourceHandler.addAndSaveSource(refMods, derivedUnitBase);
909

    
910
        TaxonDescription taxonDescription = importer.getTaxonDescription(acceptedTaxon, false, true);
911

    
912
        Feature feature=null;
913
        if (event.equalsIgnoreCase("collection")){
914
            feature = makeFeature(derivedUnitBase);
915
        }
916
        else{
917
            feature = Feature.MATERIALS_EXAMINED();
918
        }
919
        featuresMap.put(feature.getTitleCache(),  feature);
920

    
921
        descr=myspecimenOrObservation.getDescr();
922
        if(!StringUtils.isEmpty(descr)) {
923
            derivedUnitBase.setTitleCache(descr, true);
924
        }
925

    
926
        IndividualsAssociation indAssociation = createIndividualAssociation(refMods, derivedUnitBase, feature);
927

    
928
        taxonDescription.addElement(indAssociation);
929
        sourceHandler.addAndSaveSource(refMods, taxonDescription,null);
930
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
931

    
932
        return derivedUnitBase.getTitleCache();
933

    
934
    }
935

    
936

    
937
    /**
938
     * @param description: the XML node group
939
     * @param acceptedTaxon: the current acceptedTaxon
940
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
941
     * @param nametosave: the list of objects to save into the CDM
942
     * @param refMods: the current reference extracted from the MODS
943
     * @param featureName: the feature name
944
     */
945
    @SuppressWarnings({ "rawtypes"})
946
    private String extractSpecificFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
947
            List<TaxonNameBase> nametosave, Reference refMods, String featureName ) {
948
        logger.info("extractSpecificFeature "+featureName);
949
        //        System.out.println("GRUUUUuu");
950
        NodeList children = description.getChildNodes();
951
        NodeList insideNodes ;
952
        NodeList trNodes;
953
        //        String descr ="";
954
        String localdescr="";
955
        List<String> blabla=null;
956
        List<String> text = new ArrayList<String>();
957

    
958
        String table="<table>";
959
        String head="";
960
        String line="";
961

    
962
        Feature currentFeature=getFeatureObjectFromString(featureName);
963

    
964
        //        String fullContent = description.getTextContent();
965
        for (int i=0;i<children.getLength();i++){
966
            //            localdescr="";
967
            if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
968
                text.add(children.item(i).getTextContent().trim());
969
            }
970
            if (featureName.equalsIgnoreCase("table")){
971
                if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
972
                        children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("thead")){
973
                    head = extractTableHead(children.item(i));
974
                    table+=head;
975
                    line = extractTableLine(children.item(i));
976
                    if (!line.equalsIgnoreCase("<tr></tr>")) {
977
                        table+=line;
978
                    }
979
                }
980
                if (children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
981
                        children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
982
                    line = extractTableLineWithColumn(children.item(i).getChildNodes());
983
                    if(!line.equalsIgnoreCase("<tr></tr>")) {
984
                        table+=line;
985
                    }
986
                }
987
            }
988
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
989
                insideNodes=children.item(i).getChildNodes();
990
                blabla= new ArrayList<String>();
991
                for (int j=0;j<insideNodes.getLength();j++){
992
                    Node insideNode = insideNodes.item(j);
993
                	if (insideNode.getNodeName().equalsIgnoreCase("tax:name")){
994
                        String inlinetext = getInlineTextForName(nametosave, refMods, insideNode);
995
                        if (!inlinetext.isEmpty()) {
996
                            blabla.add(inlinetext);
997
                        }
998
                    }
999
                    else if (insideNode.getNodeName().equalsIgnoreCase("#text")) {
1000
                        if(!insideNode.getTextContent().trim().isEmpty()){
1001
                            blabla.add(insideNode.getTextContent().trim());
1002
                            //                            localdescr += insideNodes.item(j).getTextContent().trim();
1003
                        }
1004
                    }
1005
                }
1006
                if (!blabla.isEmpty()) {
1007
                    String blaStr = StringUtils.join(blabla," ").trim();
1008
                    if(!stringIsEmpty(blaStr)) {
1009
                        setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1010
                        text.add(blaStr);
1011
                    }
1012
                }
1013

    
1014
            }
1015
            if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1016
                if(!children.item(i).getTextContent().trim().isEmpty()){
1017
                    localdescr = children.item(i).getTextContent().trim();
1018
                    if(!stringIsEmpty(localdescr)) {
1019
                        setParticularDescription(localdescr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1020
                    }
1021
                }
1022
            }
1023
        }
1024

    
1025
        table+="</table>";
1026
        if (!table.equalsIgnoreCase("<table></table>")){
1027
            //            System.out.println("TABLE : "+table);
1028
            text.add(table);
1029
        }
1030

    
1031
        if (text !=null && !text.isEmpty()) {
1032
            return StringUtils.join(text," ");
1033
        } else {
1034
            return "";
1035
        }
1036

    
1037
    }
1038

    
1039
    /**
1040
     * @param children
1041
     * @param i
1042
     * @return
1043
     */
1044
    private String extractTableLine(Node child) {
1045
        //logger.info("extractTableLine");
1046
        String line;
1047
        line="<tr>";
1048
        if (child.getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1049
            line = extractTableLineWithColumn(child.getChildNodes());
1050
        }
1051
        line+="</tr>";
1052
        return line;
1053
    }
1054

    
1055
    /**
1056
     * @param children
1057
     * @param i
1058
     * @return
1059
     */
1060
    private String extractTableHead(Node child) {
1061
        //logger.info("extractTableHead");
1062
        String head;
1063
        String line;
1064
        head="<th>";
1065
        NodeList trNodes = child.getChildNodes();
1066
        for (int k=0;k<trNodes.getLength();k++){
1067
            if (trNodes.item(k).getNodeName().equalsIgnoreCase("tax:div")
1068
                    && trNodes.item(k).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("tr")){
1069
                line = extractTableLineWithColumn(trNodes.item(k).getChildNodes());
1070
                head+=line;
1071
            }
1072
        }
1073
        head+="</th>";
1074
        return head;
1075
    }
1076

    
1077
    /**
1078
     * build a html table line, with td columns
1079
     * @param tdNodes
1080
     * @return an html coded line
1081
     */
1082
    private String extractTableLineWithColumn(NodeList tdNodes) {
1083
        //logger.info("extractTableLineWithColumn");
1084
        String line;
1085
        line="<tr>";
1086
        for (int l=0;l<tdNodes.getLength();l++){
1087
            if (tdNodes.item(l).getNodeName().equalsIgnoreCase("tax:p")){
1088
                line+="<td>"+tdNodes.item(l).getTextContent()+"</td>";
1089
            }
1090
        }
1091
        line+="</tr>";
1092
        return line;
1093
    }
1094

    
1095
    /**
1096
     * @param description: the XML node group
1097
     * @param acceptedTaxon: the current acceptedTaxon
1098
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1099
     * @param nametosave: the list of objects to save into the CDM
1100
     * @param refMods: the current reference extracted from the MODS
1101
     * @param featureName: the feature name
1102
     */
1103
    @SuppressWarnings({ "unused", "rawtypes" })
1104
    private String extractSpecificFeatureNotStructured(Node description, Taxon acceptedTaxon, Taxon defaultTaxon,
1105
            List<TaxonNameBase> nameToSave, Reference refMods, String featureName ) {
1106
        logger.info("extractSpecificFeatureNotStructured " + featureName);
1107
        NodeList children = description.getChildNodes();
1108
        NodeList insideNodes ;
1109
        List<String> blabla= new ArrayList<String>();
1110

    
1111

    
1112
        Feature currentFeature = getFeatureObjectFromString(featureName);
1113

    
1114
        String fullContent = description.getTextContent();
1115
        for (int i=0;i<children.getLength();i++){
1116
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1117
                insideNodes=children.item(i).getChildNodes();
1118
                for (int j=0;j<insideNodes.getLength();j++){
1119
                    if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1120
                        String inlineText =getInlineTextForName(nameToSave, refMods, insideNodes.item(j));
1121
                        if(!inlineText.isEmpty()) {
1122
                            blabla.add(inlineText);
1123
                        }
1124
                    }
1125
                    if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1126
                        if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1127
                            blabla.add(insideNodes.item(j).getTextContent().trim());
1128
                        }
1129
                    }
1130
                }
1131
            }
1132
            if (children.item(i).getNodeName().equalsIgnoreCase("#text")){
1133
                if(!children.item(i).getTextContent().trim().isEmpty()){
1134
                    String localdescr = children.item(i).getTextContent().trim();
1135
                    if(!localdescr.isEmpty())
1136
                    {
1137
                        blabla.add(localdescr);
1138
                    }
1139
                }
1140
            }
1141
        }
1142

    
1143
        if (blabla !=null && !blabla.isEmpty()) {
1144
            String blaStr = StringUtils.join(blabla," ").trim();
1145
            if (! stringIsEmpty(blaStr)) {
1146
                setParticularDescription(blaStr,acceptedTaxon,defaultTaxon, refMods,currentFeature);
1147
                return blaStr;
1148
            } else {
1149
                return "";
1150
            }
1151
        } else {
1152
            return "";
1153
        }
1154

    
1155
    }
1156

    
1157
    /**
1158
     * @param blaStr
1159
     * @return
1160
     */
1161
    private boolean stringIsEmpty(String blaStr) {
1162
        if (blaStr.matches("(\\.|,|;|\\.-)?")){
1163
        	return true;
1164
        }else{
1165
        	return false;
1166
        }
1167
    }
1168

    
1169
    /**
1170
     * @param nametosave
1171
     * @param refMods
1172
     * @param insideNodes
1173
     * @param blabla
1174
     * @param j
1175
     */
1176
    @SuppressWarnings({ "rawtypes" })
1177
    private String getInlineTextForName(List<TaxonNameBase> nametosave, Reference refMods, Node insideNode) {
1178
        if (true){
1179
        	NodeList children = insideNode.getChildNodes();
1180
        	String result = "";
1181
            for (int i=0;i<children.getLength();i++){
1182
            	Node nameChild = children.item(i);
1183
                if(nameChild.getNodeName().equalsIgnoreCase("#text")){
1184
                	result += nameChild.getTextContent();
1185
                }else{
1186
                	//do nothing
1187
                }
1188
            }
1189
        	return result.replace("\n", "").trim();
1190
        }else{
1191
	    	TaxonNameBase tnb = getTaxonNameBaseFromXML(insideNode, nametosave,refMods,false);
1192
	        //                        Taxon tax = getTaxonFromTxonNameBase(tnb, refMods);
1193
	        Taxon tax = currentMyName.getTaxon();
1194
	        if(tnb !=null && tax != null){
1195
	            String linkedTaxon = tnb.getTitleCache().split("sec")[0];//TODO NOT IMPLEMENTED IN THE CDM YET
1196
	            return "<cdm:taxon uuid='"+tax.getUuid()+"'>"+linkedTaxon+"</cdm:taxon>";
1197
	        }else if (tnb != null && tax == null){
1198
	        	//TODO
1199
	        	return "<cdm:taxonName uuid='" + tnb.getUuid() +"'>" + tnb.getTitleCache().split("sec")[0]  +"</cdm:taxonName>";
1200
	        }else{
1201
	        	logger.warn("Inline text has no content yet");
1202
	        }
1203
	        return "";
1204
        }
1205
    }
1206

    
1207
    /**
1208
     * @param featureName
1209
     * @return
1210
     */
1211
    @SuppressWarnings("rawtypes")
1212
    private Feature getFeatureObjectFromString(String featureName) {
1213
        logger.info("getFeatureObjectFromString");
1214
        List<Feature> features = importer.getTermService().list(Feature.class, null,null,null,null);
1215
        Feature currentFeature=null;
1216
        for (Feature feature: features){
1217
            String tmpF = feature.getTitleCache();
1218
            if (tmpF.equalsIgnoreCase(featureName)) {
1219
                currentFeature=feature;
1220
                //                System.out.println("currentFeatureFromList "+currentFeature.getUuid());
1221
            }
1222
        }
1223
        if (currentFeature == null) {
1224
            currentFeature=Feature.NewInstance(featureName, featureName, featureName);
1225
            if(featureName.equalsIgnoreCase("Other")){
1226
                currentFeature.setUuid(OtherUUID);
1227
            }
1228
            if(featureName.equalsIgnoreCase(notMarkedUp)){
1229
                currentFeature.setUuid(NotMarkedUpUUID);
1230
            }
1231
            importer.getTermService().saveOrUpdate(currentFeature);
1232
        }
1233
        return currentFeature;
1234
    }
1235

    
1236

    
1237

    
1238

    
1239
    /**
1240
     * @param children: the XML node group
1241
     * @param nametosave: the list of objects to save into the CDM
1242
     * @param acceptedTaxon: the current acceptedTaxon
1243
     * @param refMods: the current reference extracted from the MODS
1244
     * @param fullContent :the parsed XML content
1245
     * @return a list of description (text)
1246
     */
1247
    @SuppressWarnings({ "unused", "rawtypes" })
1248
    private List<String> parseParagraph(List<TaxonNameBase> namesToSave, Taxon acceptedTaxon, Reference refMods, Node paragraph, Feature feature){
1249
        logger.info("parseParagraph "+feature.toString());
1250
        List<String> fullDescription=  new ArrayList<String>();
1251
        //        String localdescr;
1252
        String descr="";
1253
        NodeList insideNodes ;
1254
        boolean collectionEvent = false;
1255
        List<Node>collectionEvents = new ArrayList<Node>();
1256

    
1257
        NodeList children = paragraph.getChildNodes();
1258

    
1259
        for (int i=0;i<children.getLength();i++){
1260
            //            localdescr="";
1261
            if (children.item(i).getNodeName().equalsIgnoreCase("#text") && !children.item(i).getTextContent().trim().isEmpty()){
1262
                descr += children.item(i).getTextContent().trim();
1263
            }
1264
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1265
                insideNodes=children.item(i).getChildNodes();
1266
                List<String> blabla= new ArrayList<String>();
1267
                for (int j=0;j<insideNodes.getLength();j++){
1268
                    boolean nodeKnown = false;
1269
                    //    System.out.println("insideNodes.item(j).getNodeName() : "+insideNodes.item(j).getNodeName());
1270
                    if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1271
                        String inlineText = getInlineTextForName(namesToSave, refMods, insideNodes.item(j));
1272
                        if (!inlineText.isEmpty()) {
1273
                            blabla.add(inlineText);
1274
                        }
1275
                        nodeKnown=true;
1276
                    }
1277
                    else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("#text")) {
1278
                        if(!insideNodes.item(j).getTextContent().trim().isEmpty()){
1279
                            blabla.add(insideNodes.item(j).getTextContent().trim());
1280
                            // localdescr += insideNodes.item(j).getTextContent().trim();
1281
                        }
1282
                        nodeKnown=true;
1283
                    }
1284
                    else if (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:bibref")) {
1285
                        String ref = insideNodes.item(j).getTextContent().trim();
1286
                        if (ref.endsWith(";")  && ((ref.length())>1)) {
1287
                            ref=ref.substring(0, ref.length()-1)+".";
1288
                        }
1289
                        Reference reference = ReferenceFactory.newGeneric();
1290
                        reference.setTitleCache(ref, true);
1291
                        blabla.add(reference.getTitleCache());
1292
                        nodeKnown=true;
1293
                    }
1294
                    else if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:figure")){
1295
                        String figure = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "figure");
1296
                        blabla.add(figure);
1297
                    }
1298
                    else if(insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:div") &&
1299
                            insideNodes.item(j).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1300
                            insideNodes.item(j).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1301
                        String table = extractSpecificFeature(insideNodes.item(j),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1302
                        blabla.add(table);
1303
                    }
1304
                    else if  (insideNodes.item(j).getNodeName().equalsIgnoreCase("tax:collection_event")) {
1305
                        //                        logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1306
                        String titlecache  = extractMaterialsDirect(insideNodes.item(j), acceptedTaxon, refMods, "collection", null);
1307
                        blabla.add(titlecache);
1308
                        collectionEvent=true;
1309
                        collectionEvents.add(insideNodes.item(j));
1310
                        nodeKnown=true;
1311
                    }else{
1312
                    	logger.warn("node not handled yet: " + insideNodes.item(j).getNodeName());
1313
                    }
1314

    
1315
                }
1316
                if (!StringUtils.isBlank(StringUtils.join(blabla," "))) {
1317
                    fullDescription.add(StringUtils.join(blabla," "));
1318
                }
1319
            }
1320
            if  (children.item(i).getNodeName().equalsIgnoreCase("tax:figure")){
1321
                String figure = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "Figure");
1322
                fullDescription.add(figure);
1323
            }
1324
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:div") &&
1325
                    children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("Other") &&
1326
                    children.item(i).getAttributes().getNamedItem("otherType").getNodeValue().equalsIgnoreCase("table")){
1327
                String table = extractSpecificFeature(children.item(i),acceptedTaxon,acceptedTaxon, namesToSave, refMods, "table");
1328
                fullDescription.add(table);
1329
            }
1330
        }
1331

    
1332
        if( !stringIsEmpty(descr.trim())){
1333
            Feature currentFeature= getNotMarkedUpFeatureObject();
1334
            setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1335
        }
1336
        //        if (collectionEvent) {
1337
        //            logger.warn("SEEMS TO BE COLLECTION EVENT INSIDE A "+feature.toString());
1338
        //            for (Node coll:collectionEvents){
1339
        //                = extractMaterialsDirect(coll, acceptedTaxon, refMods, "collection");
1340
        //            }
1341
        //        }
1342
        return fullDescription;
1343
    }
1344

    
1345

    
1346
    /**
1347
     * @param description: the XML node group
1348
     * @param acceptedTaxon: the current acceptedTaxon
1349
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1350
     * @param nametosave: the list of objects to save into the CDM
1351
     * @param refMods: the current reference extracted from the MODS
1352
     * @param feature: the feature to link the data with
1353
     */
1354
    @SuppressWarnings("rawtypes")
1355
    private void extractFeature(Node description, Taxon acceptedTaxon, Taxon defaultTaxon, List<TaxonNameBase> namesToSave, Reference refMods, Feature feature){
1356
        logger.info("EXTRACT FEATURE "+feature.toString());
1357
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1358
        List<String> fullDescription= parseParagraph( namesToSave, acceptedTaxon, refMods, description,feature);
1359

    
1360
        //        System.out.println("Feature : "+feature.toString()+", "+fullDescription.toString());
1361
        if (!fullDescription.isEmpty() &&!stringIsEmpty(StringUtils.join(fullDescription,"\n").trim())) {
1362
            setParticularDescription(StringUtils.join(fullDescription,"\n").trim(),acceptedTaxon,defaultTaxon, refMods,feature);
1363
        }
1364

    
1365
    }
1366

    
1367

    
1368
    /**
1369
     * @param descr: the XML Nodegroup to parse
1370
     * @param acceptedTaxon: the current acceptedTaxon
1371
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1372
     * @param refMods: the current reference extracted from the MODS
1373
     * @param currentFeature: the feature name
1374
     * @return
1375
     */
1376
    private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon, Reference refMods, Feature currentFeature) {
1377
        logger.info("setParticularDescription " + currentFeature.getTitleCache()+", \n blabla : "+descr);
1378

    
1379
        //remove redundant feature title
1380
        String featureStr = currentFeature.getTitleCache();
1381
        if (!descr.isEmpty() && descr.toLowerCase().startsWith(featureStr.toLowerCase())){
1382
        	descr = descr.replaceAll("(?i)" + featureStr + "\\.\\s*", "");
1383
        }
1384

    
1385

    
1386
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1387
        featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1388

    
1389
        TextData textData = createTextData(descr, refMods, currentFeature);
1390

    
1391
        if(acceptedTaxon!=null){
1392
            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1393
            td.addElement(textData);
1394
            acceptedTaxon.addDescription(td);
1395

    
1396
            sourceHandler.addAndSaveSource(refMods, td, null);
1397
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1398
        }
1399

    
1400
        if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1401
            try{
1402
                Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1403
                if (tmp!=null) {
1404
                    defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1405
                }else{
1406
                    importer.getTaxonService().saveOrUpdate(defaultTaxon);
1407
                }
1408
            }catch(Exception e){
1409
                logger.debug("TAXON EXISTS"+defaultTaxon);
1410
            }
1411

    
1412
            TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1413
            defaultTaxon.addDescription(td);
1414
            td.addElement(textData);
1415
            sourceHandler.addAndSaveSource(refMods, td, null);
1416
            importer.getTaxonService().saveOrUpdate(defaultTaxon);
1417
        }
1418
    }
1419

    
1420
    /**
1421
     * @param descr
1422
     * @param refMods
1423
     * @param currentFeature
1424
     * @return
1425
     */
1426
    private TextData createTextData(String descr, Reference refMods, Feature currentFeature) {
1427
        //logger.info("createTextData");
1428
        TextData textData = TextData.NewInstance();
1429
        textData.setFeature(currentFeature);
1430
        sourceHandler.addSource(refMods, textData);
1431

    
1432
        textData.putText(Language.UNKNOWN_LANGUAGE(), descr);
1433
        return textData;
1434
    }
1435

    
1436

    
1437

    
1438
    /**
1439
     * @param descr: the XML Nodegroup to parse
1440
     * @param acceptedTaxon: the current acceptedTaxon
1441
     * @param defaultTaxon: the current defaultTaxon, only used if there is no accepted name
1442
     * @param refMods: the current reference extracted from the MODS
1443
     * @param currentFeature: the feature name
1444
     * @return
1445
     */
1446
    private void setParticularDescription(String descr, Taxon acceptedTaxon, Taxon defaultTaxon,Reference currentRef, Reference refMods, Feature currentFeature) {
1447
        //        System.out.println("setParticularDescriptionSPecial "+currentFeature);
1448
        //        logger.info("acceptedTaxon: "+acceptedTaxon);
1449
        logger.info("setParticularDescription");
1450
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1451

    
1452
        featuresMap.put(currentFeature.getTitleCache(),currentFeature);
1453
        TextData textData = createTextData(descr, refMods, currentFeature);
1454

    
1455
        if(! descr.isEmpty() && (acceptedTaxon!=null)){
1456
            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1457
            td.addElement(textData);
1458
            acceptedTaxon.addDescription(td);
1459

    
1460
            sourceHandler.addAndSaveSource(refMods, td, currentRef);
1461
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1462
        }
1463

    
1464
        if(! descr.isEmpty() && (acceptedTaxon == null) && (defaultTaxon != null)){
1465
            try{
1466
                Taxon tmp =(Taxon) importer.getTaxonService().find(defaultTaxon.getUuid());
1467
                if (tmp!=null) {
1468
                    defaultTaxon=CdmBase.deproxy(tmp,Taxon.class);
1469
                }else{
1470
                    importer.getTaxonService().saveOrUpdate(defaultTaxon);
1471
                }
1472
            }catch(Exception e){
1473
                logger.debug("TAXON EXISTS"+defaultTaxon);
1474
            }
1475

    
1476
            TaxonDescription td =importer.getTaxonDescription(defaultTaxon, false, true);
1477
            defaultTaxon.addDescription(td);
1478
            td.addElement(textData);
1479
            sourceHandler.addAndSaveSource(currentRef, td,currentRef);
1480
            importer.getTaxonService().saveOrUpdate(defaultTaxon);
1481
        }
1482
    }
1483

    
1484

    
1485

    
1486
    /**
1487
     * @param synonyms: the XML Nodegroup to parse
1488
     * @param nametosave: the list of objects to save into the CDM
1489
     * @param acceptedTaxon: the current acceptedTaxon
1490
     * @param refMods: the current reference extracted from the MODS
1491
     */
1492
    @SuppressWarnings({ "rawtypes" })
1493
    private void extractSynonyms(Node synonymsNode, Taxon acceptedTaxon,Reference refMods, String followingText) {
1494
        logger.info("extractSynonyms");
1495
        //System.out.println("extractSynonyms for: "+acceptedTaxon);
1496
        Taxon ttmp = (Taxon) importer.getTaxonService().find(acceptedTaxon.getUuid());
1497
        if (ttmp != null) {
1498
            acceptedTaxon = CdmBase.deproxy(ttmp,Taxon.class);
1499
        }
1500
        else{
1501
            acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1502
        }
1503
        NodeList children = synonymsNode.getChildNodes();
1504
        List<MyName> names = new ArrayList<MyName>();
1505

    
1506
        if(synonymsNode.getNodeName().equalsIgnoreCase("tax:name")){
1507
            try {
1508
            	MyName myName = extractScientificNameSynonym(synonymsNode, refMods, followingText);
1509
                names.add(myName);
1510
            } catch (TransformerFactoryConfigurationError e) {
1511
                logger.warn(e);
1512
            } catch (TransformerException e) {
1513
                logger.warn(e);
1514
            }
1515
        }
1516

    
1517

    
1518
        for (int i=0;i<children.getLength();i++){
1519
            if (children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1520
                NodeList tmp = children.item(i).getChildNodes();
1521
                //                String fullContent = children.item(i).getTextContent();
1522
                for (int j=0; j< tmp.getLength();j++){
1523
                    if(tmp.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1524
                        try {
1525
                        	MyName myName = extractScientificNameSynonym(tmp.item(j),refMods, followingText);
1526
                            names.add(myName);
1527
                        } catch (TransformerFactoryConfigurationError e) {
1528
                            logger.warn(e);
1529
                        } catch (TransformerException e) {
1530
                            logger.warn(e);
1531
                        }
1532
                    }
1533
                }
1534
            }
1535
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:name")){
1536
                try {
1537
                	MyName myName = extractScientificNameSynonym(children.item(i),refMods, followingText);
1538
                    names.add(myName);
1539
                } catch (TransformerFactoryConfigurationError e) {
1540
                    logger.warn(e);
1541
                } catch (TransformerException e) {
1542
                    logger.warn(e);
1543
                }
1544

    
1545
            }
1546
        }
1547

    
1548
        for(MyName name:names){
1549
        	TaxonNameBase nameToBeFilled = name.getTaxonNameBase();
1550
            Synonym synonym = name.getSyno();
1551
            addFollowingTextToName(nameToBeFilled, followingText);
1552

    
1553
            /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
1554
            nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank());
1555
            if (nameToBeFilled.hasProblem() &&
1556
                    !((nameToBeFilled.getParsingProblems().size()==1) && nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1557
                //            if (nameToBeFilled.hasProblem() && nameToBeFilled.getParsingProblems().contains(ParserProblem.UnparsableNamePart)){
1558
                addProblemNameToFile(name.getName(),"",nomenclaturalCode,name.getRank());
1559
                nameToBeFilled = solveNameProblem(name.getOriginalName(), name.getName(), parser,name.getAuthor(), name.getRank());
1560
            }
1561
            nameToBeFilled = getTaxonNameBase(nameToBeFilled,nametosave,statusType);
1562
             */
1563
            if (!name.getIdentifier().isEmpty() && (name.getIdentifier().length()>2)){
1564
                setLSID(name.getIdentifier(), synonym);
1565
            }
1566

    
1567
            Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1568
            boolean synoExist = false;
1569
            for (Synonym syn: synonymsSet){
1570

    
1571
                boolean a =syn.getName().equals(synonym.getName());
1572
                boolean b = syn.getSec().equals(synonym.getSec());
1573
                if (a && b) {
1574
                    synoExist=true;
1575
                }
1576
            }
1577
            if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1578
                sourceHandler.addSource(refMods, synonym);
1579
                acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1580
            }
1581
        }
1582
        importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1583
    }
1584

    
1585

    
1586
    private boolean addFollowingTextToName(ITaxonNameBase nameToBeFilled, String followingText) {
1587
    	if (nameToBeFilled != null && StringUtils.isNotBlank(followingText)){
1588
    		if (! followingText.matches("\\d\\.?")){
1589

    
1590
	    		if (followingText.startsWith(",")){
1591
	    			followingText = followingText.substring(1).trim();
1592
	    		}
1593
	    		nameToBeFilled.setFullTitleCache(nameToBeFilled.getFullTitleCache()+ "," +followingText , true);
1594
    		}
1595
    		return true;
1596
    	}
1597
    	return false;
1598

    
1599
	}
1600

    
1601
	/**
1602
     * @param refgroup: the XML nodes
1603
     * @param nametosave: the list of objects to save into the CDM
1604
     * @param acceptedTaxon: the current acceptedTaxon
1605
     * @param nametosave: the list of objects to save into the CDM
1606
     * @param refMods: the current reference extracted from the MODS
1607
     * @return the acceptedTaxon (why?)
1608
     * handle cases where the bibref are inside <p> and outside
1609
     */
1610
    @SuppressWarnings({ "rawtypes" })
1611
    private Taxon extractReferences(Node refgroup, List<TaxonNameBase> nametosave, Taxon acceptedTaxon, Reference refMods) {
1612
        logger.info("extractReferences");
1613
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
1614

    
1615
        NodeList children = refgroup.getChildNodes();
1616
        INonViralName nameToBeFilled = getNonViralNameAccNomenclature();
1617

    
1618
        ReferenceBuilder refBuild = new ReferenceBuilder(sourceHandler);
1619
        for (int i=0;i<children.getLength();i++){
1620
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:bibref")){
1621
                String ref = children.item(i).getTextContent().trim();
1622
                refBuild.builReference(ref, treatmentMainName, nomenclaturalCode,  acceptedTaxon, refMods);
1623
                if (!refBuild.isFoundBibref()){
1624
                    extractReferenceRawText(children.item(i).getChildNodes(), nameToBeFilled, refMods, acceptedTaxon);
1625
                }
1626
            }
1627

    
1628
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:p")){
1629
                NodeList references = children.item(i).getChildNodes();
1630
                String descr="";
1631
                for (int j=0;j<references.getLength();j++){
1632
                    if(references.item(j).getNodeName().equalsIgnoreCase("tax:bibref")){
1633
                        String ref = references.item(j).getTextContent().trim();
1634
                        refBuild.builReference(ref, treatmentMainName,  nomenclaturalCode,  acceptedTaxon, refMods);
1635
                    }
1636
                    else
1637
                        if (references.item(j).getNodeName().equalsIgnoreCase("#text")
1638
                                && !references.item(j).getTextContent().trim().isEmpty()){
1639
                            descr += references.item(j).getTextContent().trim();
1640
                        }
1641

    
1642
                }
1643
                if (!refBuild.isFoundBibref()){
1644
                    //if it's not tagged, put it as row information.
1645
                    //                    extractReferenceRawText(references, nameToBeFilled, nametosave, refMods, acceptedTaxon);
1646
                    //then put it as a not markup feature if not empty
1647
                    if (!stringIsEmpty(descr.trim())){
1648
                        Feature currentFeature= getNotMarkedUpFeatureObject();
1649
                        setParticularDescription(descr.trim(),acceptedTaxon,acceptedTaxon, refMods,currentFeature);
1650
                    }
1651
                }
1652
            }
1653
        }
1654
        //        importer.getClassificationService().saveOrUpdate(classification);
1655
        return acceptedTaxon;
1656

    
1657
    }
1658

    
1659
    /**
1660
     * get the non viral name according to the current nomenclature
1661
     * @return
1662
     */
1663

    
1664
    private INonViralName getNonViralNameAccNomenclature() {
1665
    	return nomenclaturalCode.getNewTaxonNameInstance(null);
1666
    }
1667

    
1668
    /**
1669
     * @return the feature object for the category "not marked up"
1670
     */
1671
    private Feature getNotMarkedUpFeatureObject() {
1672
    	// FIXME use getFeature(uuid ....)
1673
        logger.info("getNotMarkedUpFeatureObject");
1674
        Feature currentFeature = (Feature)importer.getTermService().find(NotMarkedUpUUID);
1675
        if (currentFeature == null) {
1676
            currentFeature=Feature.NewInstance(notMarkedUp, notMarkedUp, notMarkedUp);
1677
            currentFeature.setUuid(NotMarkedUpUUID);
1678
            //TODO use userDefined Feature Vocabulary
1679
            Feature.DISTRIBUTION().getVocabulary().addTerm(currentFeature);
1680
//            importer.getTermService().saveOrUpdate(currentFeature);
1681
            importer.getVocabularyService().saveOrUpdate(currentFeature.getVocabulary());
1682
        }
1683
        return currentFeature;
1684
    }
1685

    
1686
    /**
1687
     * @param references
1688
     * handle cases where the bibref are inside <p> and outside
1689
     */
1690
    @SuppressWarnings("rawtypes")
1691
    private void extractReferenceRawText(NodeList references, INonViralName nameToBeFilled, Reference refMods,
1692
            Taxon acceptedTaxon) {
1693
        logger.info("extractReferenceRawText");
1694
        String refString="";
1695
        currentMyName= new MyName(true);
1696
        for (int j=0;j<references.getLength();j++){
1697
            acceptedTaxon=CdmBase.deproxy(acceptedTaxon, Taxon.class);
1698
            //no bibref tag inside
1699
            //            System.out.println("references.item(j).getNodeName()"+references.item(j).getNodeName());
1700
            if (references.item(j).getNodeName().equalsIgnoreCase("tax:name")){
1701

    
1702
                try {
1703
                	String followingText = null;  //needs to be checked if follText is possible
1704
                	//TODO create or not create?
1705
                    currentMyName = extractScientificName(references.item(j), refMods, followingText);
1706
                } catch (TransformerFactoryConfigurationError e) {
1707
                    logger.warn(e);
1708
                } catch (TransformerException e) {
1709
                    logger.warn(e);
1710
                }
1711

    
1712
                //                name=name.trim();
1713
            }
1714
            if (references.item(j).getNodeName().equalsIgnoreCase("#text")){
1715
                refString = references.item(j).getTextContent().trim();
1716
            }
1717
            if(references.item(j).getNodeName().equalsIgnoreCase("#text") && !references.item(j).getTextContent().trim().isEmpty()){
1718
                //
1719
               if (!currentMyName.getStatus().isEmpty()){
1720
            	   String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1721
	               	if (nomNovStatus != null){
1722
	               		nameToBeFilled.setAppendedPhrase(nomNovStatus);
1723
	               	}else{
1724
	            	   try {
1725
	                        NomenclaturalStatusType  statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1726
                            nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1727
	                    } catch (UnknownCdmTypeException e) {
1728
	                        addProblematicStatusToFile(currentMyName.getStatus());
1729
	                        logger.warn("Problem with status");
1730
	                    }
1731
	               	}
1732
                }
1733

    
1734
                String fullLineRefName = references.item(j).getTextContent().trim();
1735
                int nameOrRefOrOther=2;
1736
                nameOrRefOrOther=askIfNameContained(fullLineRefName);
1737
                if (nameOrRefOrOther==0){
1738
                    TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1739
                    Synonym synonym = Synonym.NewInstance(nameTBF, refMods);
1740

    
1741
                    Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1742
                    //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1743
                    boolean synoExist = false;
1744
                    for (Synonym syn: synonymsSet){
1745
                        //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1746
                        boolean a =syn.getName().equals(synonym.getName());
1747
                        boolean b = syn.getSec().equals(synonym.getSec());
1748
                        if (a && b) {
1749
                            synoExist=true;
1750
                        }
1751
                    }
1752
                    if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1753
                        sourceHandler.addSource(refMods, synonym);
1754

    
1755
                        acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1756
                    }
1757
                }
1758

    
1759
                if (nameOrRefOrOther==1){
1760
                    Reference re = ReferenceFactory.newGeneric();
1761
                    re.setTitleCache(fullLineRefName, true);
1762

    
1763
                    /* TaxonNameBase nameTBF = parser.parseFullName(currentMyName.getName(), nomenclaturalCode, currentMyName.getRank());
1764
                    if (nameTBF.hasProblem() &&
1765
                            !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank)) ) {
1766
                        addProblemNameToFile(currentMyName.getName(),"",nomenclaturalCode,currentMyName.getRank());
1767
                        nameTBF=solveNameProblem(currentMyName.getName(), currentMyName.getName(),parser,currentMyName.getAuthor(), currentMyName.getRank());
1768
                    }
1769
                    nameTBF = getTaxonNameBase(nameTBF,nametosave,statusType);
1770
                     */
1771
                    TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1772
                    Synonym synonym = Synonym.NewInstance(nameTBF, re);
1773

    
1774
                    Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1775
                    //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1776
                    boolean synoExist = false;
1777
                    for (Synonym syn: synonymsSet){
1778
                        //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1779
                        boolean a =syn.getName().equals(synonym.getName());
1780
                        boolean b = syn.getSec().equals(synonym.getSec());
1781
                        if (a && b) {
1782
                            synoExist=true;
1783
                        }
1784
                    }
1785
                    if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1786
                        sourceHandler.addSource(refMods, synonym);
1787

    
1788
                        acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1789
                    }
1790

    
1791
                }
1792

    
1793

    
1794
                if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1795
                    setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1796
                }
1797
            }
1798

    
1799
            if(!currentMyName.getName().isEmpty()){
1800
                //logger.info("acceptedTaxon and name: *"+acceptedTaxon.getTitleCache()+"*, *"+currentMyName.getName()+"*");
1801
                if (acceptedTaxon.getTitleCache().split("sec")[0].trim().equalsIgnoreCase(currentMyName.getName().trim())){
1802
                    Reference refS = ReferenceFactory.newGeneric();
1803
                    refS.setTitleCache(refString, true);
1804
                    //                            TaxonDescription td =importer.getTaxonDescription(acceptedTaxon, false, true);
1805
                    //                            acceptedTaxon.addDescription(td);
1806
                    //                            acceptedTaxon.addSource(refSource);
1807
                    //
1808
                    //                            TextData textData = TextData.NewInstance(Feature.CITATION());
1809
                    //
1810
                    //                            textData.addSource(null, null, refS, null);
1811
                    //                            td.addElement(textData);
1812
                    //                            td.addSource(refSource);
1813
                    //                            importer.getDescriptionService().saveOrUpdate(td);
1814

    
1815

    
1816
                    if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1817
                        setLSID(currentMyName.getIdentifier(), acceptedTaxon);
1818

    
1819
                    }
1820

    
1821
                    acceptedTaxon.getName().setNomenclaturalReference(refS);
1822
                }else{
1823
                    TaxonNameBase nameTBF = currentMyName.getTaxonNameBase();
1824
                    Synonym synonym = null;
1825
                    if (! currentMyName.getStatus().isEmpty()){
1826
                    	String nomNovStatus = this.newNameStatus(currentMyName.getStatus());
1827
                    	if (nomNovStatus != null){
1828
                    		nameToBeFilled.setAppendedPhrase(nomNovStatus);
1829
                    	}else{
1830
	                    	try {
1831
	                            NomenclaturalStatusType statusType = nomStatusString2NomStatus(currentMyName.getStatus());
1832
	                            nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
1833
	                            synonym = Synonym.NewInstance(nameTBF, refMods);
1834
	                        } catch (UnknownCdmTypeException e) {
1835
	                            addProblematicStatusToFile(currentMyName.getStatus());
1836
	                            logger.warn("Problem with status");
1837
	                            synonym = Synonym.NewInstance(nameTBF, refMods);
1838
	                            synonym.setAppendedPhrase(currentMyName.getStatus());
1839
	                        }
1840
                    	}
1841
                    }else{
1842
                        synonym =  Synonym.NewInstance(nameTBF, refMods);
1843
                    }
1844

    
1845

    
1846
                    if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
1847
                        setLSID(currentMyName.getIdentifier(), synonym);
1848
                    }
1849

    
1850
                    Set<Synonym> synonymsSet= acceptedTaxon.getSynonyms();
1851
                    //                    System.out.println(synonym.getName()+" -- "+synonym.getSec());
1852
                    boolean synoExist = false;
1853
                    for (Synonym syn: synonymsSet){
1854
                        //                        System.out.println(syn.getName()+" -- "+syn.getSec());
1855
                        boolean a =syn.getName().equals(synonym.getName());
1856
                        boolean b = syn.getSec().equals(synonym.getSec());
1857
                        if (a && b) {
1858
                            synoExist=true;
1859
                        }
1860
                    }
1861
                    if (!synonymsSet.contains(synonym) && ! (synoExist)) {
1862
                        sourceHandler.addSource(refMods, synonym);
1863

    
1864
                        acceptedTaxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
1865
                    }
1866
                }
1867
            }
1868
            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
1869
        }
1870
    }
1871

    
1872

    
1873

    
1874
    /**
1875
     * @param identifier
1876
     * @param acceptedTaxon
1877
     */
1878
    @SuppressWarnings("rawtypes")
1879
    private void setLSID(String identifier, TaxonBase<?> taxon) {
1880
        //logger.info("setLSID");
1881
        //        boolean lsidok=false;
1882
        String id = identifier.split("__")[0];
1883
        String source = identifier.split("__")[1];
1884
        if (id.indexOf("lsid")>-1){
1885
            try {
1886
                LSID lsid = new LSID(id);
1887
                taxon.setLsid(lsid);
1888
                //                lsidok=true;
1889
            } catch (MalformedLSIDException e) {
1890
                logger.warn("Malformed LSID");
1891
            }
1892

    
1893
        }
1894

    
1895
        //logger.info("search reference for LSID");
1896
        //  if ((id.indexOf("lsid")<0) || !lsidok){
1897
        //ADD ORIGINAL SOURCE ID EVEN IF LSID
1898
        Reference re = null;
1899
        Pager<Reference> references = importer.getReferenceService().findByTitle(Reference.class, source, MatchMode.EXACT, null, 1, null, null, null);
1900
        if( references !=null && references.getCount()>0){
1901
            re=references.getRecords().get(0);
1902
        }
1903
        //logger.info("search reference for LSID-end");
1904
        if(re == null){
1905
            re = ReferenceFactory.newGeneric();
1906
            re.setTitleCache(source, true);
1907
            importer.getReferenceService().saveOrUpdate(re);
1908
        }
1909
        re=CdmBase.deproxy(re, Reference.class);
1910

    
1911
        //logger.info("search source for LSID");
1912
        Set<IdentifiableSource> sources = taxon.getSources();
1913
        boolean lsidinsource=false;
1914
        boolean urlinsource=false;
1915
        for (IdentifiableSource src:sources){
1916
            if (id.equalsIgnoreCase(src.getIdInSource()) && re.getTitleCache().equals(src.getCitation().getTitleCache())) {
1917
                lsidinsource=true;
1918
            }
1919
            if (src.getIdInSource() == null && re.getTitleCache().equals(sourceUrlRef.getTitleCache())) {
1920
                urlinsource=true;
1921
            }
1922
        }
1923
        if(!lsidinsource) {
1924
            taxon.addSource(OriginalSourceType.Import, id,null,re,null);
1925
        }
1926
        if(!urlinsource)
1927
        {
1928
            sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
1929
            taxon.addSource(OriginalSourceType.Import, null,null,sourceUrlRef,null);
1930
            // }
1931
        }
1932

    
1933
    }
1934

    
1935
    /**
1936
     * try to solve a parsing problem for a scientific name
1937
     * @param original : the name from the OCR document
1938
     * @param name : the tagged version
1939
     * @param parser
1940
     * @return the corrected TaxonNameBase
1941
     */
1942
    /*   @SuppressWarnings({ "unchecked", "rawtypes" })
1943
    private TaxonNameBase<?,?> solveNameProblem(String original, String name, INonViralNameParser parser, String author, Rank rank) {
1944
        Map<String,String> ato = namesMap.get(original);
1945
        if (ato == null) {
1946
            ato = namesMap.get(original+" "+author);
1947
        }
1948

    
1949

    
1950
        if (ato == null && rank.equals(Rank.UNKNOWN_RANK())){
1951
            rank=askForRank(original, Rank.UNKNOWN_RANK(), nomenclaturalCode);
1952
        }
1953
        if (ato != null && rank.equals(Rank.UNKNOWN_RANK())){
1954
            rank = getRank(ato);
1955
        }
1956
        //        TaxonNameBase<?,?> nameTBF = parser.parseFullName(name, nomenclaturalCode, rank);
1957
        TaxonNameBase<?,?> nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1958
        //                logger.info("RANK: "+rank);
1959
        int retry=0;
1960
        List<ParserProblem> problems = nameTBF.getParsingProblems();
1961
        for (ParserProblem pb:problems) {
1962
            System.out.println(pb.toString());
1963
        }
1964
        while (nameTBF.hasProblem() && (retry <1) && !((nameTBF.getParsingProblems().size()==1) && nameTBF.getParsingProblems().contains(ParserProblem.CheckRank))){
1965
            addProblemNameToFile(name,author,nomenclaturalCode,rank);
1966
            String fullname=name;
1967
            if(! skippQuestion) {
1968
                fullname =  getFullReference(name,nameTBF.getParsingProblems());
1969
            }
1970
            if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
1971
                nameTBF = TaxonNameFactory.NewBotanicalInstance(null);
1972
            }
1973
            if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
1974
                nameTBF = TaxonNameFactory.NewZoologicalInstance(null);
1975
            }
1976
            if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
1977
                nameTBF= TaxonNameFactory.NewBacterialInstance(null);
1978
            }
1979
            parser.parseReferencedName(nameTBF, fullname, rank, false);
1980
            retry++;
1981
        }
1982
        if (retry == 1){
1983
            if(author != null){
1984
                if (name.indexOf(author)>-1) {
1985
                    nameTBF = parser.parseSimpleName(name.substring(0, name.indexOf(author)), nomenclaturalCode, rank);
1986
                } else {
1987
                    nameTBF = parser.parseSimpleName(name, nomenclaturalCode, rank);
1988
                }
1989
                if (nameTBF.hasProblem()){
1990
                    if (name.indexOf(author)>-1) {
1991
                        addProblemNameToFile(name.substring(0, name.indexOf(author)),author,nomenclaturalCode,rank);
1992
                    } else {
1993
                        addProblemNameToFile(name,author,nomenclaturalCode,rank);
1994
                    }
1995
                    //                    System.out.println("TBF still has problems "+nameTBF.hasProblem());
1996
                    problems = nameTBF.getParsingProblems();
1997
                    for (ParserProblem pb:problems) {
1998
                        System.out.println(pb.toString());
1999
                    }
2000
                    nameTBF.setFullTitleCache(name, true);
2001
                }else{
2002
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)) {
2003
                        ((BotanicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2004
                    }
2005
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)) {
2006
                        ((ZoologicalName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2007
                    }
2008
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)) {
2009
                        ((BacterialName) nameTBF).setAuthorshipCache(currentMyName.getAuthor());
2010
                    }
2011
                }
2012
                //                    logger.info("FULL TITLE CACHE "+name);
2013
            }else{
2014
                nameTBF.setFullTitleCache(name, true);
2015
            }
2016
        }
2017
        return nameTBF;
2018
    }
2019

    
2020
     */
2021

    
2022
    /**
2023
     * @param nomenclatureNode: the XML nodes
2024
     * @param nametosave: the list of objects to save into the CDM
2025
     * @param refMods: the current reference extracted from the MODS
2026
     * @return
2027
     */
2028
    @SuppressWarnings({ "rawtypes" })
2029
    private Taxon extractNomenclature(Node nomenclatureNode,  List<TaxonNameBase> nametosave, Reference refMods) throws ClassCastException{
2030
        refMods=CdmBase.deproxy(refMods, Reference.class);
2031

    
2032
        logger.info("extractNomenclature");
2033
        NodeList children = nomenclatureNode.getChildNodes();
2034
        String freetext="";
2035
        Taxon acceptedTaxon = null;
2036
        //   INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2037

    
2038
        //        String fullContent = nomenclatureNode.getTextContent();
2039

    
2040
        NomenclaturalStatusType statusType = null;
2041
        String newNameStatus = null;
2042
        //TODO
2043
        for (int i=0;i<children.getLength();i++){
2044
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:status")){
2045
                String status = children.item(i).getTextContent().trim();
2046

    
2047
                if (!status.isEmpty()){
2048
                	if (newNameStatus(status) != null){
2049
                		newNameStatus = newNameStatus(status);
2050
                    }else{
2051
	                    try {
2052
	                        statusType = nomStatusString2NomStatus(status);
2053
	                    } catch (UnknownCdmTypeException e) {
2054
	//                    	nomNovStatus;
2055
	                    	addProblematicStatusToFile(status);
2056
	                        logger.warn("Problem with status: " + status);
2057
	                    }
2058
                    }
2059
                }
2060
            }
2061
        }
2062

    
2063
        boolean containsSynonyms=false;
2064
        boolean wasSynonym = false;
2065
        usedFollowingTextPrefix = null;  //reset
2066

    
2067
        for (int i=0; i<children.getLength(); i++){
2068
        	Node childNode = children.item(i);
2069
        	String childName = childNode.getNodeName();
2070

    
2071

    
2072
        	//following text
2073
        	followingText = null;
2074
        	if ( i + 1 < children.getLength()){
2075
            	Node followingTextNode = children.item(i +1);
2076
            	if (followingTextNode.getNodeName().equals("#text") && !followingTextNode.getTextContent().matches("\\s*") ){
2077
            		followingText = followingTextNode.getTextContent();
2078
            	}
2079
        	}
2080

    
2081
        	//traverse nodes
2082
            if (childName.equalsIgnoreCase("#text")) {
2083
                freetext = childNode.getTextContent().trim();
2084
                if (usedFollowingTextPrefix != null && freetext.startsWith(usedFollowingTextPrefix)){
2085
                	freetext = freetext.substring(usedFollowingTextPrefix.length());
2086
                }
2087
                usedFollowingTextPrefix = null;  //reset
2088
            }else if (childName.equalsIgnoreCase("tax:collection_event")) {
2089
                //                System.out.println("COLLECTION EVENT INSIDE NOMENCLATURE");
2090
                extractMaterialsDirect(childNode, acceptedTaxon, refMods, "collection", currentMyName.getTaxonNameBase());
2091
            }else if(childName.equalsIgnoreCase("tax:name")){
2092
                INonViralName nameToBeFilled;
2093
                //System.out.println("HANDLE FIRST NAME OF THE LIST");
2094
                if(!containsSynonyms){
2095
                	wasSynonym = false;
2096

    
2097
                	//System.out.println("I : "+i);
2098
                    currentMyName = new MyName(false);
2099
                    try {
2100
                        currentMyName = extractScientificName(childNode, refMods, followingText);
2101
                        treatmentMainName = currentMyName.getNewName();
2102
                        originalTreatmentName = currentMyName.getOriginalName();
2103

    
2104
                    } catch (TransformerFactoryConfigurationError e1) {
2105
                        throw new RuntimeException(e1);
2106
                    } catch (TransformerException e1) {
2107
                    	throw new RuntimeException(e1);
2108
                    }
2109

    
2110
                    if (currentMyName.getRank().equals(Rank.UNKNOWN_RANK()) || currentMyName.getRank().isLower(state2.getConfig().getMaxRank()) || currentMyName.getRank().equals(state2.getConfig().getMaxRank())){
2111
                        maxRankRespected=true;
2112

    
2113
                        nameToBeFilled=currentMyName.getTaxonNameBase();
2114

    
2115
                        //   acceptedTaxon = importer.getTaxonService().findBestMatchingTaxon(treatmentMainName);
2116
                        acceptedTaxon=currentMyName.getTaxon();
2117
                        //System.out.println("TreatmentName "+treatmentMainName+" - "+acceptedTaxon);
2118

    
2119

    
2120
                        boolean statusMatch=false;
2121
                        if(acceptedTaxon !=null ){
2122
                            acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2123
                            statusMatch=compareStatus(acceptedTaxon, statusType);
2124
                            //System.out.println("statusMatch: "+statusMatch);
2125
                        }
2126
                        if (acceptedTaxon ==null || (acceptedTaxon != null && !statusMatch)){
2127

    
2128
                            nameToBeFilled=currentMyName.getTaxonNameBase();
2129
                            if (nameToBeFilled != null){
2130
                                if (!originalTreatmentName.isEmpty()) {
2131
                                    TaxonNameDescription td = TaxonNameDescription.NewInstance();
2132
                                    td.setTitleCache(originalTreatmentName, true);
2133
                                    nameToBeFilled.addDescription(td);
2134
                                }
2135

    
2136
                                if(statusType != null) {
2137
                                    nameToBeFilled.addStatus(NomenclaturalStatus.NewInstance(statusType));
2138
                                }
2139
                                if(newNameStatus != null){
2140
                                	nameToBeFilled.setAppendedPhrase(newNameStatus);
2141
                                }
2142
                                sourceHandler.addSource(refMods, nameToBeFilled);
2143

    
2144
                                if (nameToBeFilled.getNomenclaturalReference() == null) {
2145
                                    acceptedTaxon= Taxon.NewInstance(nameToBeFilled,refMods);
2146
                                    //System.out.println("NEW ACCEPTED HERE "+nameToBeFilled);
2147
                                }
2148
                                else {
2149
                                    acceptedTaxon= Taxon.NewInstance(nameToBeFilled,(Reference) nameToBeFilled.getNomenclaturalReference() );//TODO TOFIX reference
2150
                                    //System.out.println("NEW ACCEPTED HERE2 "+nameToBeFilled);
2151
                                }
2152

    
2153
                                sourceHandler.addSource(refMods, acceptedTaxon);
2154

    
2155
                                if(!state2.getConfig().doKeepOriginalSecundum()) {
2156
                                    acceptedTaxon.setSec(state2.getConfig().getSecundum());
2157
                                    //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2158
                                    //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2159
                                }
2160

    
2161
                                if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2162
                                    setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2163
                                }
2164

    
2165

    
2166
                                importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2167
                                acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2168
                            }
2169

    
2170
                        }else{
2171
                            acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2172
                            Set<IdentifiableSource> sources = acceptedTaxon.getSources();
2173
                            boolean sourcelinked=false;
2174
                            for (IdentifiableSource source:sources){
2175
                                if (source.getCitation().getTitleCache().equalsIgnoreCase(refMods.getTitleCache())) {
2176
                                    sourcelinked=true;
2177
                                }
2178
                            }
2179
                            if (!state2.getConfig().doKeepOriginalSecundum()) {
2180
                                acceptedTaxon.setSec(state2.getConfig().getSecundum());
2181
                                //logger.info("SET SECUNDUM "+configState.getConfig().getSecundum());
2182
                                //System.out.println("SET SECUNDUM "+configState.getConfig().getSecundum());
2183
                            }
2184
                            importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2185

    
2186
                            if (!sourcelinked){
2187
                                sourceHandler.addSource(refMods, acceptedTaxon);
2188
                            }
2189
                            if (!sourcelinked || !state2.getConfig().doKeepOriginalSecundum()){
2190

    
2191
                                if (!currentMyName.getIdentifier().isEmpty() && (currentMyName.getIdentifier().length()>2)){
2192
                                    //FIXME are these identifiers really related to taxa, not names? Exiting LSIDs come from Zoobank, urn:lsid:biosci.ohio-state.edu:osuc_concepts:134826 (Ants)
2193
                                	setLSID(currentMyName.getIdentifier(), acceptedTaxon);
2194
                                }
2195
                                importer.getTaxonService().saveOrUpdate(acceptedTaxon);
2196
                            }
2197
                        }
2198
                    }else{
2199
                        maxRankRespected=false;
2200
                    }
2201
                    containsSynonyms=true;  //all folowing names are handled as synonyms
2202
                }else{
2203
                    try{
2204
                        extractSynonyms(childNode, acceptedTaxon, refMods, followingText);
2205
                        wasSynonym = true;
2206

    
2207
                    }catch(NullPointerException e){
2208
                        logger.warn("null pointer exception, the accepted taxon might be null");
2209
                    }
2210
                }
2211
                containsSynonyms=true;
2212
            }else if (childName.equalsIgnoreCase("tax:ref_group") && maxRankRespected){
2213
                reloadClassification();
2214
                //extract the References within the document
2215
                extractReferences(childNode,nametosave,acceptedTaxon,refMods);
2216
            }else if (childName.equalsIgnoreCase("tax:bibref")){
2217
            	logger.warn(childName + " still preliminary");
2218

    
2219
            	INonViralName currentName = currentMyName == null ? null : currentMyName.getTaxonNameBase();
2220
            	boolean handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2221
            	if (! handled){
2222
            		setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2223
            	}
2224
            }else{
2225
            	logger.warn(childName + " not yet handled");
2226
            }
2227
            if(!stringIsEmpty(freetext.trim())) {;
2228
                if (! freetext.matches("\\d\\.?")){
2229
                    INonViralName currentName = currentMyName == null ? null : currentMyName.getTaxonNameBase();
2230
                	boolean handled = false;
2231
                	if (currentName != null && !wasSynonym){
2232
                		handled = addFollowingTextToName (currentName, childNode.getTextContent() );
2233
                	}
2234
                	if (! handled){
2235
                		setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject());
2236
                	}
2237
                }
2238

    
2239
                 freetext = "";
2240
            }
2241

    
2242
        }
2243
        //importer.getClassificationService().saveOrUpdate(classification);
2244
        return acceptedTaxon;
2245
    }
2246

    
2247

    
2248

    
2249

    
2250
	/**
2251
     * @return
2252
     */
2253

    
2254
    private boolean compareStatus(TaxonBase<?> t, NomenclaturalStatusType statusType) {
2255
        //logger.info("compareStatus");
2256
        boolean statusMatch=false;
2257
        //found one taxon
2258
        Set<NomenclaturalStatus> status = t.getName().getStatus();
2259
        if (statusType!=null && status.size()>0){ //the statusType is known for both taxon
2260
            for (NomenclaturalStatus st:status){
2261
                NomenclaturalStatusType stype = st.getType();
2262
                if (stype.toString().equalsIgnoreCase(statusType.toString())) {
2263
                    statusMatch=true;
2264
                }
2265
            }
2266
        }
2267
        else{
2268
            if(statusType == null && status.size()==0) {//there is no statusType, we can assume it's the same
2269
                statusMatch=true;
2270
            }
2271
        }
2272
        return statusMatch;
2273
    }
2274

    
2275
    /**
2276
     * @param acceptedTaxon: the current acceptedTaxon
2277
     * @param ref: the current reference extracted from the MODS
2278
     * @return the parent for the current accepted taxon
2279
     */
2280
    /*  private Taxon createParent(Taxon acceptedTaxon, Reference ref) {
2281
        acceptedTaxon = CdmBase.deproxy(acceptedTaxon, Taxon.class);
2282

    
2283
        List<Rank> rankList = new ArrayList<Rank>();
2284
        rankList = importer.getTermService().listByTermClass(Rank.class, null, null, null, null);
2285

    
2286
        List<String> rankListStr = new ArrayList<String>();
2287
        for (Rank r:rankList) {
2288
            rankListStr.add(r.toString());
2289
        }
2290
        String r="";
2291
        String s = acceptedTaxon.getTitleCache();
2292
        Taxon tax = null;
2293
        if(!skippQuestion){
2294
            int addTaxon = askAddParent(s);
2295
            logger.info("ADD TAXON: "+addTaxon);
2296
            if (addTaxon == 0 ){
2297
                Taxon tmp = askParent(acceptedTaxon, classification);
2298
                if (tmp == null){
2299
                    s = askSetParent(s);
2300
                    r = askRank(s,rankListStr);
2301

    
2302
                    TaxonNameBase<?,?> nameToBeFilled = null;
2303
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNAFP)){
2304
                        nameToBeFilled = TaxonNameFactory.NewBotanicalInstance(null);
2305
                    }
2306
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2307
                        nameToBeFilled = TaxonNameFactory.NewZoologicalInstance(null);
2308
                    }
2309
                    if (nomenclaturalCode.equals(NomenclaturalCode.ICNB)){
2310
                        nameToBeFilled = TaxonNameFactory.NewBacterialInstance(null);
2311
                    }
2312
                    nameToBeFilled.setTitleCache(s, true);
2313
                    nameToBeFilled.setRank(getRank(r), true);
2314

    
2315
                    tax = Taxon.NewInstance(nameToBeFilled, ref);
2316
                }
2317
                else{
2318
                    tax=tmp;
2319
                }
2320

    
2321
                createParent(tax, ref);
2322
                //            logger.info("add parent child "+tax.getTitleCache()+", "+acceptedTaxon.getTitleCache());
2323
                classification.addParentChild(tax, acceptedTaxon, ref, null);
2324
            }
2325
            else{
2326
                classification.addChildTaxon(acceptedTaxon, ref, null);
2327
                tax=acceptedTaxon;
2328
            }
2329
        } else{
2330
            classification.addChildTaxon(acceptedTaxon, ref, null);
2331
            tax=acceptedTaxon;
2332
        }
2333
        //        logger.info("RETURN: "+tax );
2334
        return tax;
2335

    
2336
    }
2337

    
2338
     */
2339

    
2340

    
2341
    private MyName  extractScientificNameSynonym(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2342
        //System.out.println("extractScientificNameSynonym");
2343
        logger.info("extractScientificNameSynonym");
2344
        String[] rankListToPrint_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2345
        List<String> rankListToPrint = new ArrayList<String>();
2346
        for (String r : rankListToPrint_tmp) {
2347
            rankListToPrint.add(r.toLowerCase());
2348
        }
2349

    
2350
        Rank rank = Rank.UNKNOWN_RANK();
2351
        NodeList children = name.getChildNodes();
2352
        String originalName="";
2353
        String fullName = "";
2354
        String newName="";
2355
        String identifier="";
2356
        HashMap<String, String> atomisedMap = new HashMap<String, String>();
2357
        List<String> atomisedName= new ArrayList<String>();
2358

    
2359
        String rankStr = "";
2360
        Rank tmpRank ;
2361

    
2362
        String status= extractStatus(children);
2363

    
2364
        for (int i=0;i<children.getLength();i++){
2365
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:xmldata")){
2366
                NodeList atom = children.item(i).getChildNodes();
2367
                for (int k=0;k<atom.getLength();k++){
2368
                    identifier = extractIdentifier(identifier, atom.item(k));
2369
                    tmpRank = null;
2370
                    rankStr = atom.item(k).getNodeName().toLowerCase();
2371
                    //                    logger.info("RANKSTR:*"+rankStr+"*");
2372
                    if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2373
                        rankStr=atom.item(k).getTextContent().trim();
2374
                        tmpRank = getRank(rankStr);
2375
                    }
2376
                    //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2377
                    if (tmpRank != null){
2378
                        rank=tmpRank;
2379
                    }
2380
                    atomisedMap.put(rankStr.toLowerCase(),atom.item(k).getTextContent().trim());
2381
                }
2382
                addAtomisedNamesToMap(rankListToPrint, rank, atomisedName, atom);
2383
            }
2384
            if(children.item(i).getNodeName().equalsIgnoreCase("#text") && !StringUtils.isBlank(children.item(i).getTextContent())){
2385
                //                logger.info("name non atomised: "+children.item(i).getTextContent());
2386
                fullName = children.item(i).getTextContent().trim();
2387
                //                logger.info("fullname: "+fullName);
2388
            }
2389
        }
2390
        originalName=fullName;
2391
        fullName = cleanName(fullName, atomisedName);
2392
        namesMap.put(fullName,atomisedMap);
2393

    
2394
        String atomisedNameStr = getAtomisedNameStr(atomisedName);
2395

    
2396
        if (fullName != null){
2397
            //            System.out.println("fullname: "+fullName);
2398
            //            System.out.println("atomised: "+atomisedNameStr);
2399
            if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2400
                if (skippQuestion){
2401
                    //                    String defaultN = "";
2402
                    if (atomisedNameStr.length()>fullName.length()) {
2403
                        newName=atomisedNameStr;
2404
                    } else {
2405
                        if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2406
                            newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2407
                        } else {
2408
                            newName=fullName;
2409
                        }
2410
                    }
2411
                } else {
2412
                    newName=askWhichScientificName(fullName,atomisedNameStr,classification.getTitleCache(),name);
2413
                }
2414
            } else {
2415
                newName=fullName;
2416
            }
2417
        }
2418
        //not really needed
2419
        //        rank = askForRank(newName, rank, nomenclaturalCode);
2420
        //        System.out.println("atomised: "+atomisedMap.toString());
2421

    
2422
        //        String[] names = new String[5];
2423
        MyName myname = new MyName(true);
2424

    
2425
        //System.out.println("Handle "+newName+ "(rank: "+rank+")");
2426
        //        System.out.println(atomisedMap.keySet());
2427
        fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2428
        myname.setOriginalName(fullName);
2429
        myname.setNewName(newName);
2430
        myname.setRank(rank);
2431
        myname.setIdentifier(identifier);
2432
        myname.setStatus(status);
2433
        myname.setSource(refMods);
2434

    
2435
        //        boolean higherAdded=false;
2436

    
2437

    
2438
        boolean parseNameManually=false;
2439
        INonViralNameParser<?> parser = NonViralNameParserImpl.NewInstance();
2440
        ITaxonNameBase  nameToBeFilledTest ;
2441

    
2442
        //if selected the atomised version
2443
        if(newName==atomisedNameStr){
2444
            nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2445
            if (nameToBeFilledTest.hasProblem()){
2446
                addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2447
                nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode, rank);
2448
                if (nameToBeFilledTest.hasProblem()){
2449
                    addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2450
                    parseNameManually=true;
2451
                }
2452
            }
2453
        }else{
2454
            nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2455
            if (nameToBeFilledTest.hasProblem()){
2456
                addProblemNameToFile("fullversion",fullName, nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2457
                nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2458
                parseNameManually=true;
2459
                if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2460
                    addNameDifferenceToFile(originalName,atomisedNameStr);
2461
                }
2462
            }
2463
        }
2464

    
2465
        if(parseNameManually){
2466
            //System.out.println("DO IT MANUALLY");
2467
        	if (this.state2.getConfig().isUseOldUnparsedSynonymExtraction()){
2468
                createUnparsedSynonym(rank, newName, atomisedMap, myname);
2469
        	}else{
2470
        		createUnparsedSynonymNew(rank, newName, atomisedMap, myname, refMods);;
2471
        	}
2472
        } else{
2473
            //System.out.println("AUTOMATIC!");
2474
            //            createAtomisedTaxonString(newName, atomisedMap, myname);
2475
            myname.setParsedName(nameToBeFilledTest);
2476
            myname.buildTaxon();
2477
        }
2478
        //System.out.println("RETURN SYNONYM "+myname.getSyno().toString());
2479
        return myname;
2480
    }
2481

    
2482

    
2483
	/**
2484
     * @param name
2485
     * @throws TransformerFactoryConfigurationError
2486
     * @throws TransformerException
2487
     * @return a list of possible names
2488
     */
2489
    @SuppressWarnings({"rawtypes" })
2490
    private MyName extractScientificName(Node name, Reference refMods, String followingText) throws TransformerFactoryConfigurationError, TransformerException {
2491
        logger.info("extractScientificName");
2492

    
2493
        String[] rankListToPrintLowerCase_tmp ={"dwc:genus","dwc:specificepithet","dwc:species","dwc:subspecies", "dwc:infraspecificepithet","dwc:scientificnameauthorship"};
2494
        List<String> rankListToPrint = Arrays.asList(rankListToPrintLowerCase_tmp);
2495

    
2496
        Rank rank = Rank.UNKNOWN_RANK();
2497
        NodeList children = name.getChildNodes();
2498
        String originalName = "";
2499
        String fullName = "";
2500
        String newName = "";
2501
        String identifier = "";
2502
        HashMap<String, String> atomisedMap = new HashMap<String, String>();
2503
        List<String> atomisedNameList= new ArrayList<String>();
2504

    
2505
        String status= extractStatus(children);
2506

    
2507
        for (int i=0;i<children.getLength();i++){
2508
        	Node nameChild = children.item(i);
2509
            if(nameChild.getNodeName().equalsIgnoreCase("tax:xmldata")){
2510
                NodeList xmlDataChildren = nameChild.getChildNodes();
2511
                for (int k=0;k<xmlDataChildren.getLength();k++){
2512
                	Node xmlDataChild = xmlDataChildren.item(k);
2513
                    identifier = extractIdentifier(identifier, xmlDataChild);
2514
                    String rankStr = xmlDataChild.getNodeName().toLowerCase();
2515
                    if (rankStr.equalsIgnoreCase("dwc:taxonRank")) {
2516
                        rankStr=xmlDataChild.getTextContent().trim();
2517
                        Rank tmpRank = getRank(rankStr);
2518
                        if (tmpRank != null){
2519
                            rank=tmpRank;
2520
                        }
2521
                    }
2522
                    //                    if ((tmpRank != null) && (tmpRank.isLower(rank) || rank.equals(Rank.UNKNOWN_RANK()))) {
2523

    
2524
                    atomisedMap.put(rankStr.toLowerCase(),xmlDataChild.getTextContent().trim());
2525
                }
2526
                addAtomisedNamesToMap(rankListToPrint, rank, atomisedNameList, xmlDataChildren);
2527
            }
2528
            else if(nameChild.getNodeName().equalsIgnoreCase("#text") && ! nameChild.getTextContent().matches("\\s*")){
2529
                //                logger.info("name non atomised: "+children.item(i).getTextContent());
2530
                fullName = nameChild.getTextContent().trim();
2531
                //                logger.info("fullname: "+fullName);
2532
            }
2533
        }
2534
        originalName=fullName;
2535
        fullName = cleanName(fullName, atomisedNameList);
2536
        namesMap.put(fullName,atomisedMap);
2537

    
2538
        String atomisedNameStr = getAtomisedNameStr(atomisedNameList);
2539

    
2540
        if (fullName != null){
2541
            if (!fullName.equalsIgnoreCase(atomisedNameStr)) {
2542
                if (skippQuestion){
2543
                    if (atomisedNameStr.length()>fullName.length()) {
2544
                        newName = atomisedNameStr;
2545
                    } else {
2546
                        if (fullName.length()>atomisedNameStr.length() && (rank.isLower(Rank.SPECIES()) && fullName.length()>2 && !fullName.substring(0, 1).equals("."))) {
2547
                            newName = askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2548
                        } else {
2549
                            newName = fullName;
2550
                        }
2551
                    }
2552
                } else {
2553
                    newName=askWhichScientificName(fullName, atomisedNameStr, classification.getTitleCache(), name);
2554
                }
2555
            } else {
2556
                newName=fullName;
2557
            }
2558
        }
2559
        //not really needed
2560
        //        rank = askForRank(newName, rank, nomenclaturalCode);
2561
        //        System.out.println("atomised: "+atomisedMap.toString());
2562

    
2563
        //        String[] names = new String[5];
2564
        MyName myname = new MyName(false);
2565

    
2566
        //System.out.println("\n\nBUILD "+newName+ "(rank: "+rank+")");
2567
        //        System.out.println(atomisedMap.keySet());
2568
        fullName = extractAuthorFromNames(rank, fullName, atomisedMap, myname);
2569
        myname.setOriginalName(fullName);
2570
        myname.setNewName(newName);
2571

    
2572
        myname.setRank(rank);
2573
        myname.setIdentifier(identifier);
2574
        myname.setStatus(status);
2575
        myname.setSource(refMods);
2576

    
2577
        //        boolean higherAdded=false;
2578

    
2579

    
2580
        boolean parseNameManually=false;
2581
        INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
2582
        ITaxonNameBase  nameToBeFilledTest = null;
2583

    
2584
        //if selected the atomised version
2585
        if(newName==atomisedNameStr){
2586
            nameToBeFilledTest = parseWithExtension(parser, atomisedNameStr, rank, followingText, atomisedMap);
2587
            if (nameToBeFilledTest.hasProblem()){
2588
        	    addProblemNameToFile("ato",atomisedNameStr,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2589
                nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2590
                if (nameToBeFilledTest.hasProblem()){
2591
                    addProblemNameToFile("full",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2592
                    parseNameManually=true;
2593
                }
2594
            }
2595
        }else{
2596
            nameToBeFilledTest = parseWithExtension(parser, fullName , rank, followingText, atomisedMap);
2597
            if (nameToBeFilledTest.hasProblem()){
2598
                addProblemNameToFile("fullversion",fullName,nomenclaturalCode,rank, nameToBeFilledTest.getParsingProblems().toString());
2599
                nameToBeFilledTest = parser.parseFullName(fullName, nomenclaturalCode,rank);
2600
                parseNameManually=true;
2601
                if(!originalName.equalsIgnoreCase(atomisedNameStr)) {
2602
                    addNameDifferenceToFile(originalName,atomisedNameStr);
2603
                }
2604
            }
2605
        }
2606

    
2607
        //System.out.println("parseNameManually: "+parseNameManually);
2608
        if(parseNameManually){
2609
            createAtomisedTaxon(rank, newName, atomisedMap, myname);
2610
        }
2611
        else{
2612
            createAtomisedTaxonString(newName, atomisedMap, myname);
2613
            myname.setParsedName(nameToBeFilledTest);
2614
            //TODO correct handling of createIfNotExists
2615
           	myname.buildTaxon();
2616
        }
2617
        return myname;
2618

    
2619
    }
2620

    
2621
    private ITaxonNameBase parseWithExtension(INonViralNameParser parser, String atomisedNameStr, Rank rank, String followingText, HashMap<String, String> atomisedMap) {
2622
    	Object[] nameExtensionResult = getPossibleExtension(followingText, atomisedMap, nomenclaturalCode);
2623

    
2624
    	TaxonNameBase name = (TaxonNameBase)parser.parseFullName(atomisedNameStr, nomenclaturalCode, rank);
2625
    	if (nameExtensionResult != null && nameExtensionResult[0] != null){
2626
    		String ext = (String)nameExtensionResult[0];
2627
    		TaxonNameBase extName = (TaxonNameBase)parser.parseFullName(atomisedNameStr + " " + ext, nomenclaturalCode, rank);
2628
    		if (! extName.hasProblem()){
2629
    			name = extName;
2630
    			this.usedFollowingTextPrefix = ext;
2631
    			//TODO do we need to fill the atomisedMap at all?
2632
    			if ((Boolean)(nameExtensionResult[1])){
2633
    				//TODO
2634
    			}
2635
    			if ((Boolean)(nameExtensionResult[2])){
2636
    				//TODO BasionymYear etc.
2637
    				Integer origYear = name.getPublicationYear();
2638
    				if (origYear != null){
2639
        				atomisedMap.put(PUBLICATION_YEAR, origYear.toString());
2640
    				}
2641
    			}
2642
    		}
2643
    	}
2644
		return name;
2645
	}
2646

    
2647
	private Object[] getPossibleExtension(String followingText, HashMap<String, String> atomisedMap, NomenclaturalCode nomenclaturalCode) {
2648
		if (StringUtils.isBlank(followingText)){
2649
			return null;
2650
		}
2651

    
2652
    	boolean includeAuthor = true;
2653
    	boolean includeYear = false;
2654
		if (atomisedMap.containsKey("dwc:scientificnameauthorship")){
2655
			includeAuthor = false;
2656
		}
2657
    	if (nomenclaturalCode.equals(NomenclaturalCode.ICZN)){
2658
    		includeYear = true;
2659
    	}
2660
    	String patternStr = "";
2661
    	if (includeAuthor){
2662
    		patternStr += NonViralNameParserImplRegExBase.capitalWord;
2663
    	}
2664
    	if (includeYear){
2665
    		patternStr += "\\s*(,|\\s+)\\s*" + "(17|18|19|20)" + "\\d{2}" ;
2666
    	}
2667
    	String match = null;
2668
    	if (! patternStr.isEmpty()){
2669
    		Pattern pattern = Pattern.compile("^" + patternStr);
2670
    		Matcher matcher = pattern.matcher(followingText.trim());
2671
    		if (matcher.find()){
2672
    			match = matcher.group();
2673
    		}
2674
    	}
2675

    
2676
		return new Object[]{match, includeAuthor, includeYear};
2677
	}
2678

    
2679
	/**
2680
     * @param atomisedName
2681
     * @return
2682
     */
2683
    private String getAtomisedNameStr(List<String> atomisedName) {
2684
        //logger.info("getAtomisedNameStr");
2685
        String atomisedNameStr = StringUtils.join(atomisedName," ");
2686
        while(atomisedNameStr.contains("  ")) {
2687
            atomisedNameStr=atomisedNameStr.replace("  ", " ");
2688
        }
2689
        atomisedNameStr=atomisedNameStr.trim();
2690
        return atomisedNameStr;
2691
    }
2692

    
2693
    /**
2694
     * @param children
2695
     * @param status
2696
     * @return
2697
     */
2698
    private String extractStatus(NodeList children) {
2699
        logger.info("extractStatus");
2700
        String status="";
2701
        for (int i=0;i<children.getLength();i++){
2702
            if(children.item(i).getNodeName().equalsIgnoreCase("tax:status") ||
2703
                    (children.item(i).getNodeName().equalsIgnoreCase("tax:namePart") &&
2704
                            children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("status"))){
2705
                status = children.item(i).getTextContent().trim();
2706
            }
2707
        }
2708
        return status;
2709
    }
2710

    
2711
    /**
2712
     * @param identifier
2713
     * @param atom
2714
     * @param k
2715
     * @return
2716
     */
2717
    private String extractIdentifier(String identifier, Node atom) {
2718
        //logger.info("extractIdentifier");
2719
        if (atom.getNodeName().equalsIgnoreCase("tax:xid")){
2720
            try{
2721
                identifier = atom.getAttributes().getNamedItem("identifier").getNodeValue();
2722
            }catch(Exception e){
2723
                System.out.println("pb with identifier, maybe empty");
2724
            }
2725
            try{
2726
                identifier+="__"+atom.getAttributes().getNamedItem("source").getNodeValue();
2727
            }catch(Exception e){
2728
                System.out.println("pb with identifier, maybe empty");
2729
            }
2730
        }
2731
        return identifier;
2732
    }
2733

    
2734
    /**
2735
     * @param rankListToPrint
2736
     * @param rank
2737
     * @param atomisedName
2738
     * @param atom
2739
     */
2740
    private void addAtomisedNamesToMap(List<String> rankListToPrint, Rank rank, List<String> atomisedName, NodeList atom) {
2741
        logger.info("addAtomisedNamesToMap");
2742
        for (int k=0;k<atom.getLength();k++){
2743
        	Node node = atom.item(k);
2744
        	String nodeName = node.getNodeName();
2745
            if (! nodeName.equalsIgnoreCase("dwc:taxonRank") ) {  //rank has been handled in higher method
2746
                if (nodeName.equalsIgnoreCase("dwc:subgenus") || nodeName.equalsIgnoreCase("dwcranks:subgenus")) {
2747
                    atomisedName.add("("+ node.getTextContent().trim()+")");
2748
                } else if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet") || nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2749
                       	if(nodeName.equalsIgnoreCase("dwcranks:varietyepithet")){
2750
                            atomisedName.add("var. "+node.getTextContent().trim());
2751
                        }else if(nodeName.equalsIgnoreCase("dwc:Subspecies") || nodeName.equalsIgnoreCase("dwc:infraspecificepithet")) {
2752
                            atomisedName.add("subsp. "+atom.item(k).getTextContent().trim());
2753
                        }
2754
                } else if(rankListToPrint.contains(nodeName.toLowerCase())) {
2755
                    atomisedName.add(node.getTextContent().trim());
2756
                } else{
2757
                    if (rank.isHigher(Rank.GENUS()) && (nodeName.indexOf("dwcranks:")>-1 || nodeName.indexOf("dwc:Family")>-1)) {
2758
                        atomisedName.add(node.getTextContent().trim());
2759
                    }else if (nodeName.equals("#text")){
2760
                    	String text = node.getTextContent();
2761
                    	if (StringUtils.isNotBlank(text)){
2762
                    		//TODO handle text
2763
                    		logger.warn("name xmldata contains text. This is unhandled");
2764
                    	}
2765
                    }else if (nodeName.matches("(?i)(dwc:Kingdom|dwc:Class|dwc:Order|dwc:Family)")){
2766
                    	//we currently do not use higher ranks information
2767
                    }else{
2768
                    	//TODO handle unhandled node
2769
                    	logger.warn("Unhandled node: " + nodeName);
2770
                    }
2771
                }
2772
            }
2773
        }
2774
    }
2775

    
2776
    /**
2777
     * @param fullName
2778
     * @param atomisedName
2779
     * @return
2780
     */
2781
    private String cleanName(String name, List<String> atomisedName) {
2782
        //logger.info("cleanName");
2783
        String fullName =name;
2784
        if (fullName != null){
2785
            fullName = fullName.replace("( ", "(");
2786
            fullName = fullName.replace(" )",")");
2787

    
2788
            if (fullName.trim().isEmpty()){
2789
                fullName=StringUtils.join(atomisedName," ");
2790
            }
2791

    
2792
            while(fullName.contains("  ")) {
2793
                fullName=fullName.replace("  ", " ");
2794
                //            logger.info("while");
2795
            }
2796
            fullName=fullName.trim();
2797
        }
2798
        return fullName;
2799
    }
2800

    
2801
    /**
2802
     * @param rank
2803
     * @param fullName
2804
     * @param atomisedMap
2805
     * @param myname
2806
     * @return
2807
     */
2808
    private String extractAuthorFromNames(Rank rank, String name, HashMap<String, String> atomisedMap, MyName myname) {
2809
        logger.info("extractAuthorFromNames");
2810
        String fullName=name;
2811
        if (atomisedMap.get("dwc:scientificnameauthorship") == null && fullName!=null){
2812
            //            System.out.println("rank : "+rank.toString());
2813
            if(rank.isHigher(Rank.SPECIES())){
2814
                try{
2815
                    String author=null;
2816
                    if(atomisedMap.get("dwcranks:subgenus") != null) {
2817
                        author = fullName.split(atomisedMap.get("dwcranks:subgenus"))[1].trim();
2818
                    }
2819
                    if(atomisedMap.get("dwc:subgenus") != null) {
2820
                        author = fullName.split(atomisedMap.get("dwc:subgenus"))[1].trim();
2821
                    }
2822
                    if(author == null) {
2823
                        if(atomisedMap.get("dwc:genus") != null) {
2824
                            author = fullName.split(atomisedMap.get("dwc:genus"))[1].trim();
2825
                        }
2826
                    }
2827
                    if(author != null){
2828
                        fullName = fullName.substring(0, fullName.indexOf(author));
2829
                        author=author.replaceAll(",","").trim();
2830
                        myname.setAuthor(author);
2831
                    }
2832
                }catch(Exception e){
2833
                    //could not extract the author
2834
                }
2835
            }
2836
            if(rank.equals(Rank.SPECIES())){
2837
                try{
2838
                    String author=null;
2839
                    if(author == null) {
2840
                        if(atomisedMap.get("dwc:species") != null) {
2841
                            String[] t = fullName.split(atomisedMap.get("dwc:species"));
2842
                            //                            System.out.println("NB ELEMENTS "+t.length +"fullName "+fullName+", "+atomisedMap.get("dwc:species"));
2843
                            author = fullName.split(atomisedMap.get("dwc:species"))[1].trim();
2844
                            //                            System.out.println("AUTEUR "+author);
2845
                        }
2846
                    }
2847
                    if(author != null){
2848
                        fullName = fullName.substring(0, fullName.indexOf(author));
2849
                        author=author.replaceAll(",","").trim();
2850
                        myname.setAuthor(author);
2851
                    }
2852
                }catch(Exception e){
2853
                    //could not extract the author
2854
                }
2855
            }
2856
        }else{
2857
            myname.setAuthor(atomisedMap.get("dwc:scientificnameauthorship"));
2858
        }
2859
        return fullName;
2860
    }
2861

    
2862
    /**
2863
     * @param newName
2864
     * @param atomisedMap
2865
     * @param myname
2866
     */
2867
    private void createAtomisedTaxonString(String newName, HashMap<String, String> atomisedMap, MyName myname) {
2868
        logger.info("createAtomisedTaxonString "+atomisedMap);
2869
        if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
2870
            myname.setFamilyStr(atomisedMap.get("dwc:family"));
2871
        }
2872
        if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY())){
2873
            myname.setSubfamilyStr(atomisedMap.get("dwcranks:subfamily"));
2874
        }
2875
        if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
2876
            myname.setTribeStr(atomisedMap.get("dwcranks:tribe"));
2877
        }
2878
        if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
2879
            myname.setSubtribeStr(atomisedMap.get("dwcranks:subtribe"));
2880
        }
2881
        if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
2882
            myname.setGenusStr(atomisedMap.get("dwc:genus"));
2883
        }
2884
        if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2885
            myname.setSubgenusStr(atomisedMap.get("dwcranks:subgenus"));
2886
        }
2887
        if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
2888
            myname.setSubgenusStr(atomisedMap.get("dwc:subgenus"));
2889
        }
2890
        if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
2891
            String n=newName;
2892
            if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2893
                n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2894
                n=n.replace("subsp.","");
2895
            }
2896
            if(atomisedMap.get("dwc:subspecies") != null) {
2897
                n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2898
                n=n.replace("subsp.","");
2899
            }
2900
            if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2901
                n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2902
                n=n.replace("var.","");
2903
                n=n.replace("v.","");
2904
            }
2905
            if(atomisedMap.get("dwcranks:formepithet") != null) {
2906
                //TODO
2907
                System.out.println("TODO FORMA");
2908
                n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
2909
                n=n.replace("forma","");
2910
            }
2911
            n=n.trim();
2912
            String author = myname.getAuthor();
2913
            if(n.split(" ").length>2){
2914

    
2915
                String n2=n.split(" ")[0]+" "+n.split(" ")[1];
2916
                String a= "";
2917
                try{
2918
                    a=n.split(n2)[1].trim();
2919
                }catch(Exception e){
2920
                    logger.info("no author in "+n+"?");}
2921

    
2922
                myname.setAuthor(a);
2923
                //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
2924
                n=n2;
2925

    
2926
            }
2927

    
2928
            myname.setSpeciesStr(atomisedMap.get("dwc:species"));
2929
            myname.setAuthor(author);
2930
        }
2931
        if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2932
            myname.setSubspeciesStr(atomisedMap.get("dwc:subspecies"));
2933
        }
2934
        if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
2935
            myname.setSubspeciesStr(atomisedMap.get("dwc:infraspecificepithet"));
2936
        }
2937
        if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
2938
            myname.setVarietyStr(atomisedMap.get("dwcranks:varietyepithet"));
2939
        }
2940
        if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
2941
            myname.setFormStr(atomisedMap.get("dwcranks:formepithet"));
2942
        }
2943
        if (atomisedMap.get(PUBLICATION_YEAR) != null){
2944
        	myname.setPublicationYear(Integer.valueOf(atomisedMap.get(PUBLICATION_YEAR)));
2945
        }
2946
    }
2947

    
2948
    /**
2949
     * @see #createUnparsedSynonymNew(Rank, String, HashMap, MyName)
2950
     * @param rank
2951
     * @param newName
2952
     * @param atomisedMap
2953
     * @param myname
2954
     */
2955
    private void createUnparsedSynonym(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
2956
        logger.info("createSynonym");
2957
        //System.out.println("createsynonym");
2958
        if(rank.equals(Rank.UNKNOWN_RANK())){
2959
            myname.setNotParsableTaxon(newName);
2960
        }else{
2961
	        if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY()) && rank.equals(Rank.FAMILY())){
2962
	            myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
2963
	        }
2964
	        if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY()) && rank.equals(Rank.SUBFAMILY())){
2965
	            myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
2966
	        }
2967
	        if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE()) && rank.equals(Rank.TRIBE())){
2968
	            myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
2969
	        }
2970
	        if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE()) && rank.equals(Rank.SUBTRIBE())){
2971
	            myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
2972
	        }
2973
	        if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS()) && rank.equals(Rank.GENUS())){
2974
	            myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
2975
	        }
2976
	        if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2977
	            myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
2978
	        }
2979
	        if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS()) && rank.equals(Rank.SUBGENUS())){
2980
	            myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
2981
	        }
2982
	        if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES()) && rank.equals(Rank.SPECIES())){
2983
	            String n=newName;
2984
	            if(atomisedMap.get("dwc:infraspecificepithet") != null) {
2985
	                n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
2986
	                n=n.replace("subsp.","");
2987
	            }
2988
	            if(atomisedMap.get("dwc:subspecies") != null) {
2989
	                n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
2990
	                n=n.replace("subsp.","");
2991
	            }
2992
	            if(atomisedMap.get("dwcranks:varietyepithet") != null) {
2993
	                n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
2994
	                n=n.replace("var.","");
2995
	                n=n.replace("v.","");
2996
	            }
2997
	            if(atomisedMap.get("dwcranks:formepithet") != null) {
2998
	                //TODO
2999
	                //System.out.println("TODO FORMA");
3000
	                n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3001
	                n=n.replace("forma","");
3002
	            }
3003
	            n=n.trim();
3004
	            String author = myname.getAuthor();
3005
	            if(n.split(" ").length>2){
3006

    
3007
	                String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3008
	                String a="";
3009
	                try{
3010
	                    a= n.split(n2)[1].trim();
3011
	                }catch(Exception e){logger.info("no author in "+n);}
3012
	                myname.setAuthor(a);
3013
	                //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3014
	                n=n2;
3015

    
3016
	            }
3017
	            Taxon species = myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank);
3018
	            myname.setSpecies(species);
3019
	            myname.setAuthor(author);
3020
	        }
3021
	        if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3022
	            myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3023
	        }
3024
	        if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES()) && rank.equals(Rank.SUBSPECIES())){
3025
	            myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3026
	        }
3027
	        if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY()) && rank.equals(Rank.VARIETY())){
3028
	            myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3029
	        }
3030
	        if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM()) && rank.equals(Rank.FORM())){
3031
	            myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3032
	        }
3033
        }
3034

    
3035
    }
3036

    
3037

    
3038
    /**
3039
     * @param refMods
3040
     * @see #createUnparsedSynonym(Rank, String, HashMap, MyName)
3041
     * the original TaxonXImport extracted Synonyms by creating acc Taxa with partial names
3042
     * I (AM) do not understand this but don't want to destroy code which maybe works in some cases) there
3043
     * I created this switch for old
3044
     * for Spiders the new version is preferred
3045
     */
3046
    private void createUnparsedSynonymNew(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname, Reference refMods) {
3047
        logger.info("createSynonym");
3048

    
3049
        INonViralName nameToBeFilled = this.getNonViralNameAccNomenclature();
3050
        //System.out.println("createsynonym");
3051
        if(rank.equals(Rank.UNKNOWN_RANK())){
3052
            //TODO
3053
        	myname.setNotParsableTaxon(newName);
3054

    
3055
        	nameToBeFilled.setTitleCache(newName, true);
3056
        }else{
3057
        	if(atomisedMap.get("dwc:genus") != null ){
3058
    			nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:genus"));
3059
	        }
3060
        	if (rank.isSupraGeneric()){
3061
        		if (atomisedMap.get("dwcranks:subtribe") != null ){
3062
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3063
    	        }else if (atomisedMap.get("dwcranks:subtribe") != null ){
3064
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subtribe"));
3065
    	        }else if (atomisedMap.get("dwcranks:tribe") != null ){
3066
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:tribe"));
3067
    	        }else if (atomisedMap.get("dwcranks:subfamily") != null ){
3068
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwcranks:subfamily"));
3069
    	        }else if (atomisedMap.get("dwc:family") != null ){
3070
    	        	nameToBeFilled.setGenusOrUninomial(atomisedMap.get("dwc:family"));
3071
        	    }else{
3072
        	    	logger.warn("Supra generic rank not yet handled or atomisation not available");
3073
        	    }
3074
        	}
3075
        	if (atomisedMap.get("dwcranks:subgenus") != null){
3076
        		nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwcranks:subgenus"));
3077
        	}
3078
        	if (atomisedMap.get("dwc:subgenus") != null){
3079
        		nameToBeFilled.setInfraGenericEpithet(atomisedMap.get("dwc:subgenus"));
3080
        	}
3081
        	if (atomisedMap.get("dwc:species") != null){
3082
        		nameToBeFilled.setSpecificEpithet(atomisedMap.get("dwc:species"));
3083
        	}
3084
        	if (atomisedMap.get("dwcranks:formepithet") != null){
3085
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:formepithet"));
3086
        	}else if (atomisedMap.get("dwcranks:varietyepithet") != null){
3087
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwcranks:varietyepithet"));
3088
        	}else if (atomisedMap.get("dwc:infraspecificepithet") != null){
3089
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:infraspecificepithet"));
3090
        	}else if (atomisedMap.get("dwc:subspecies") != null){
3091
        		nameToBeFilled.setInfraSpecificEpithet(atomisedMap.get("dwc:subspecies"));
3092
        	}
3093
            Reference sec = sourceUrlRef;
3094
            if(!state2.getConfig().doKeepOriginalSecundum()){
3095
                sec = state2.getConfig().getSecundum();
3096
            }
3097
        	Synonym syn = Synonym.NewInstance(nameToBeFilled, sec);
3098
//        	sourceHandler.addSource(refMods, syn);
3099
        	myname.setSyno(syn);
3100
        	myname.setSynonym(true);
3101
        }
3102
	}
3103

    
3104
    /**
3105
     * @param rank
3106
     * @param newName
3107
     * @param atomisedMap
3108
     * @param myname
3109
     */
3110
    private void createAtomisedTaxon(Rank rank, String newName, HashMap<String, String> atomisedMap, MyName myname) {
3111
        logger.info("createAtomisedTaxon "+atomisedMap);
3112
        if(rank.equals(Rank.UNKNOWN_RANK())){
3113
            myname.setNotParsableTaxon(newName);
3114
        }
3115
        else{
3116
            if(atomisedMap.get("dwc:family") != null && checkRankValidForImport(Rank.FAMILY())){
3117
                myname.setFamily(myname.findOrCreateTaxon(atomisedMap.get("dwc:family"),newName, Rank.FAMILY(),rank));
3118
            }
3119
            if(atomisedMap.get("dwcranks:subfamily") != null  && checkRankValidForImport(Rank.SUBFAMILY())){
3120
                myname.setSubfamily(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subfamily"), newName,Rank.SUBFAMILY(),rank));
3121
            }
3122
            if(atomisedMap.get("dwcranks:tribe") != null && checkRankValidForImport(Rank.TRIBE())){
3123
                myname.setTribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:tribe"),newName, Rank.TRIBE(),rank));
3124
            }
3125
            if(atomisedMap.get("dwcranks:subtribe") != null && checkRankValidForImport(Rank.SUBTRIBE())){
3126
                myname.setSubtribe(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subtribe"),newName, Rank.SUBTRIBE(),rank));
3127
            }
3128
            if(atomisedMap.get("dwc:genus") != null && checkRankValidForImport(Rank.GENUS())){
3129
                myname.setGenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:genus"),newName, Rank.GENUS(),rank));
3130
            }
3131
            if(atomisedMap.get("dwcranks:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3132
                myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:subgenus"),newName, Rank.SUBGENUS(),rank));
3133
            }
3134
            if(atomisedMap.get("dwc:subgenus") != null && checkRankValidForImport(Rank.SUBGENUS())){
3135
                myname.setSubgenus(myname.findOrCreateTaxon(atomisedMap.get("dwc:subgenus"),newName, Rank.SUBGENUS(),rank));
3136
            }
3137
            if(atomisedMap.get("dwc:species") != null && checkRankValidForImport(Rank.SPECIES())){
3138
                String n=newName;
3139
                if(atomisedMap.get("dwc:infraspecificepithet") != null) {
3140
                    n=newName.split(atomisedMap.get("dwc:infraspecificepithet"))[0];
3141
                    n=n.replace("subsp.","");
3142
                }
3143
                if(atomisedMap.get("dwc:subspecies") != null) {
3144
                    n=newName.split(atomisedMap.get("dwc:subspecies"))[0];
3145
                    n=n.replace("subsp.","");
3146
                }
3147
                if(atomisedMap.get("dwcranks:varietyepithet") != null) {
3148
                    n=newName.split(atomisedMap.get("dwcranks:varietyepithet"))[0];
3149
                    n=n.replace("var.","");
3150
                    n=n.replace("v.","");
3151
                }
3152
                if(atomisedMap.get("dwcranks:formepithet") != null) {
3153
                    //TODO
3154
                    //System.out.println("TODO FORMA");
3155
                    n=newName.split(atomisedMap.get("dwcranks:formepithet"))[0];
3156
                    n=n.replace("forma","");
3157
                }
3158
                n=n.trim();
3159
                String author = myname.getAuthor();
3160
                if(n.split(" ").length>2){
3161
                    String n2=n.split(" ")[0]+" "+n.split(" ")[1];
3162
                    String a="";
3163
                    try{
3164
                        a= n.split(n2)[1].trim();
3165
                    }catch(Exception e){logger.info("no author  in "+n);}
3166
                    myname.setAuthor(a);
3167
                    //System.out.println("FINDCREATESPECIES --"+n2+"--"+n+"**"+a+"##");
3168
                    n=n2;
3169

    
3170
                }
3171

    
3172
                myname.setSpecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:species"),n, Rank.SPECIES(),rank));
3173
                myname.setAuthor(author);
3174
            }
3175
            if(atomisedMap.get("dwc:subspecies") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3176
                myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:subspecies"), newName,Rank.SUBSPECIES(),rank));
3177
            }
3178
            if(atomisedMap.get("dwc:infraspecificepithet") != null && checkRankValidForImport(Rank.SUBSPECIES())){
3179
                myname.setSubspecies(myname.findOrCreateTaxon(atomisedMap.get("dwc:infraspecificepithet"),newName, Rank.SUBSPECIES(),rank));
3180
            }
3181
            if(atomisedMap.get("dwcranks:varietyepithet") != null && checkRankValidForImport(Rank.VARIETY())){
3182
                myname.setVariety(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:varietyepithet"),newName, Rank.VARIETY(),rank));
3183
            }
3184
            if(atomisedMap.get("dwcranks:formepithet") != null && checkRankValidForImport(Rank.FORM())){
3185
                myname.setForm(myname.findOrCreateTaxon(atomisedMap.get("dwcranks:formepithet"), newName,Rank.FORM(),rank));
3186
            }
3187
        }
3188
    }
3189

    
3190
    /**
3191
     * @return
3192
     */
3193
    private boolean checkRankValidForImport(Rank currentRank) {
3194
        //logger.info("checkRankValidForImport");
3195
        return currentRank.isLower(state2.getConfig().getMaxRank()) || currentRank.equals(state2.getConfig().getMaxRank());
3196
    }
3197

    
3198

    
3199

    
3200
    /**
3201
     * @param classification2
3202
     */
3203
    public void updateClassification(Classification classification2) {
3204
        //logger.info("updateClassification");
3205
        classification = classification2;
3206
    }
3207

    
3208

    
3209

    
3210
    public class MyName {
3211
        /**
3212
         * @param isSynonym
3213
         */
3214
        public MyName(boolean isSynonym) {
3215
            super();
3216
            this.isSynonym = isSynonym;
3217
        }
3218

    
3219
        String originalName="";
3220
        String newName="";
3221
        Rank rank=Rank.UNKNOWN_RANK();
3222
        String identifier="";
3223
        String status="";
3224
        String author=null;
3225

    
3226
        TaxonNameBase<?,?> taxonNameBase;
3227

    
3228
        Reference refMods ;
3229

    
3230
        Taxon family,subfamily,tribe,subtribe,genus,subgenus,species,subspecies, variety,form;
3231
        INonViralName familyName, subfamilyName, tribeName,subtribeName,genusName,subgenusName,speciesName,subspeciesName;
3232
        String familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr;
3233
        Integer publicationYear;
3234

    
3235

    
3236
		Taxon higherTaxa;
3237
        Rank higherRank;
3238
        private Taxon taxon;
3239
        private Synonym syno;
3240

    
3241
        /**
3242
         * @return the syno
3243
         */
3244
        public Synonym getSyno() {
3245
            return syno;
3246
        }
3247

    
3248
        @Override
3249
        public String toString(){
3250
            List<String> tot=new ArrayList<String>();
3251
            String[] n= {familyStr, subfamilyStr, tribeStr,subtribeStr,genusStr,subgenusStr,speciesStr,subspeciesStr,formStr,varietyStr};
3252
            for (String elt:n){
3253
                if (!StringUtils.isEmpty(elt)) {
3254
                    tot.add(elt);
3255
                } else {
3256
                    tot.add("*");
3257
                }
3258
            }
3259
            return StringUtils.join(tot," ");
3260
        }
3261
        /**
3262
         * @param syno the syno to set
3263
         */
3264
        public void setSyno(Synonym syno) {
3265
            this.syno = syno;
3266
        }
3267

    
3268
        boolean isSynonym=false;
3269

    
3270
        /**
3271
         * @return the isSynonym
3272
         */
3273
        public boolean isSynonym() {
3274
            return isSynonym;
3275
        }
3276

    
3277
        /**
3278
         * @param isSynonym the isSynonym to set
3279
         */
3280
        public void setSynonym(boolean isSynonym) {
3281
            this.isSynonym = isSynonym;
3282
        }
3283

    
3284
        public void setSource(Reference re){
3285
            refMods=re;
3286
        }
3287

    
3288
        /**
3289
         * @param string
3290
         */
3291
        public void setFormStr(String string) {
3292
            this.formStr=string;
3293

    
3294
        }
3295
        /**
3296
         * @param string
3297
         */
3298
        public void setVarietyStr(String string) {
3299
            this.varietyStr=string;
3300

    
3301
        }
3302
        /**
3303
         * @param string
3304
         */
3305
        public void setSubspeciesStr(String string) {
3306
            this.subspeciesStr=string;
3307

    
3308
        }
3309
        /**
3310
         * @param string
3311
         */
3312
        public void setSpeciesStr(String string) {
3313
            this.speciesStr=string;
3314

    
3315
        }
3316
        /**
3317
         * @param string
3318
         */
3319
        public void setSubgenusStr(String string) {
3320
            this.subgenusStr=string;
3321

    
3322
        }
3323
        /**
3324
         * @param string
3325
         */
3326
        public void setGenusStr(String string) {
3327
            this.genusStr=string;
3328

    
3329
        }
3330
        /**
3331
         * @param string
3332
         */
3333
        public void setSubtribeStr(String string) {
3334
            this.subtribeStr=string;
3335

    
3336
        }
3337
        /**
3338
         * @param string
3339
         */
3340
        public void setTribeStr(String string) {
3341
            this.tribeStr=string;
3342

    
3343
        }
3344
        /**
3345
         * @param string
3346
         */
3347
        public void setSubfamilyStr(String string) {
3348
            this.subfamilyStr=string;
3349

    
3350
        }
3351
        /**
3352
         * @param string
3353
         */
3354
        public void setFamilyStr(String string) {
3355
            this.familyStr=string;
3356

    
3357
        }
3358
        /**
3359
         * @return the familyStr
3360
         */
3361
        public String getFamilyStr() {
3362
            return familyStr;
3363
        }
3364
        /**
3365
         * @return the subfamilyStr
3366
         */
3367
        public String getSubfamilyStr() {
3368
            return subfamilyStr;
3369
        }
3370
        /**
3371
         * @return the tribeStr
3372
         */
3373
        public String getTribeStr() {
3374
            return tribeStr;
3375
        }
3376
        /**
3377
         * @return the subtribeStr
3378
         */
3379
        public String getSubtribeStr() {
3380
            return subtribeStr;
3381
        }
3382
        /**
3383
         * @return the genusStr
3384
         */
3385
        public String getGenusStr() {
3386
            return genusStr;
3387
        }
3388
        /**
3389
         * @return the subgenusStr
3390
         */
3391
        public String getSubgenusStr() {
3392
            return subgenusStr;
3393
        }
3394
        /**
3395
         * @return the speciesStr
3396
         */
3397
        public String getSpeciesStr() {
3398
            return speciesStr;
3399
        }
3400
        /**
3401
         * @return the subspeciesStr
3402
         */
3403
        public String getSubspeciesStr() {
3404
            return subspeciesStr;
3405
        }
3406
        /**
3407
         * @return the formStr
3408
         */
3409
        public String getFormStr() {
3410
            return formStr;
3411
        }
3412
        /**
3413
         * @return the varietyStr
3414
         */
3415
        public String getVarietyStr() {
3416
            return varietyStr;
3417
        }
3418

    
3419
        public Integer getPublicationYear() {
3420
			return publicationYear;
3421
		}
3422

    
3423
		public void setPublicationYear(Integer publicationYear) {
3424
			this.publicationYear = publicationYear;
3425
		}
3426

    
3427
        /**
3428
         * @param newName2
3429
         */
3430
        public void setNotParsableTaxon(String newName2) {
3431
            //takes too much time
3432
            //            List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3433

    
3434
            NomenclaturalStatusType statusType = null;
3435
            if (!getStatus().isEmpty()){
3436
                try {
3437
                    statusType = nomStatusString2NomStatus(getStatus());
3438
                } catch (UnknownCdmTypeException e) {
3439
                    addProblematicStatusToFile(getStatus());
3440
                    logger.warn("Problem with status");
3441
                }
3442
            }
3443
            List<TaxonBase> tmpList = new ArrayList<>();
3444

    
3445
            Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, newName2, MatchMode.BEGINNING, null, null, null, null, null);
3446
            tmpList.addAll(taxontest.getRecords());
3447

    
3448
            //logger.info("tmpList returned: "+tmpList.size());
3449

    
3450

    
3451
            INonViralName identicName = null;
3452
            boolean foundIdentic=false;
3453
            TaxonBase<?> tmpTaxonBase=null;
3454
            //            Taxon tmpPartial=null;
3455
            for (TaxonBase<?> tmpb:tmpList){
3456
                if(tmpb !=null){
3457
                    TaxonNameBase<?,?> tnb =  tmpb.getName();
3458
                    Rank crank=null;
3459
                    if (tnb != null){
3460
                        if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(newName2) ){
3461
                            crank =tnb.getRank();
3462
                            if (crank !=null && rank !=null){
3463
                                if (crank.equals(rank)){
3464
                                	identicName = tnb;
3465
                                	if (isSynonym && tmpb.isInstanceOf(Synonym.class) || !isSynonym && tmpb.isInstanceOf(Taxon.class)){
3466
                                		foundIdentic=true;
3467
                                		tmpTaxonBase=tmpb;
3468
                               			break;
3469
                                	}
3470
                                }
3471
                            }
3472
                        }
3473
                    }
3474
                }
3475
            }
3476
            boolean statusMatch=false;
3477
            boolean appendedMatch=false;
3478
            if(tmpTaxonBase !=null && foundIdentic){
3479
                statusMatch=compareStatus(tmpTaxonBase, statusType);
3480
                if (!getStatus().isEmpty() && ! (tmpTaxonBase.getAppendedPhrase() == null)) {
3481
                    appendedMatch=tmpTaxonBase.getAppendedPhrase().equals(getStatus());
3482
                }
3483
                if (getStatus().isEmpty() && tmpTaxonBase.getAppendedPhrase() == null) {
3484
                    appendedMatch=true;
3485
                }
3486

    
3487
            }
3488
            if ((tmpTaxonBase == null || !foundIdentic) ||  (tmpTaxonBase != null && !statusMatch) ||  (tmpTaxonBase != null && !appendedMatch && !statusMatch)){
3489

    
3490
            	INonViralName tnb;
3491
            	if (identicName == null){
3492
            		tnb = getNonViralNameAccNomenclature();
3493
            		tnb.setRank(rank);
3494

    
3495
	                if(statusType != null) {
3496
	                    tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3497
	                }
3498
	                if(StringUtils.isNotBlank(getStatus())) {
3499
	                    tnb.setAppendedPhrase(getStatus());
3500
	                }
3501
	                tnb.setTitleCache(newName2,true);
3502
	                tmpTaxonBase = findMatchingTaxon(tnb,refMods);
3503
	            }else{
3504
            		tnb = identicName;
3505
            	}
3506

    
3507
                if(tmpTaxonBase==null){
3508
                    tmpTaxonBase = isSynonym ? Synonym.NewInstance(tnb, refMods) : Taxon.NewInstance(tnb, refMods);
3509
                    if(!state2.getConfig().doKeepOriginalSecundum()) {
3510
                        tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3511
                    }
3512
                    //tmptaxonbase.setSec(refMods);
3513
                    if(!isSynonym) {
3514
                        classification.addChildTaxon((Taxon)tmpTaxonBase, null, null);
3515
                        sourceHandler.addSource(refMods, (Taxon)tmpTaxonBase);
3516
                    }
3517
                }
3518
            }
3519

    
3520
            tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3521
            if (author != null) {
3522
                if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
3523
                    setLSID(getIdentifier(), tmpTaxonBase);
3524
                    importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3525
                    tmpTaxonBase = CdmBase.deproxy(tmpTaxonBase, TaxonBase.class);
3526
                }
3527
            }
3528
            TaxonNameBase<?,?> tnb = CdmBase.deproxy(tmpTaxonBase.getName(), TaxonNameBase.class);
3529

    
3530
            if(!isSynonym) {
3531
                this.taxon=(Taxon)tmpTaxonBase;
3532
            } else {
3533
                if (tmpTaxonBase instanceof Taxon){
3534
                	logger.warn("Incorrect status");
3535
                }
3536
            	this.syno=(Synonym)tmpTaxonBase;
3537
            }
3538

    
3539
            taxonNameBase = tnb;
3540

    
3541
        }
3542

    
3543
        /**
3544
         *
3545
         */
3546
        public void buildTaxon() {
3547
            //System.out.println("BUILD TAXON");
3548
            logger.info("buildTaxon");
3549
            NomenclaturalStatusType statusType = null;
3550
            if (!getStatus().isEmpty()){
3551
            	status = getStatus();
3552
            	String newNameStatus = newNameStatus(status);
3553
            	if (newNameStatus != null){
3554
            		taxonNameBase.setAppendedPhrase(newNameStatus);
3555
            	}else{
3556
            		try {
3557
            			statusType = nomStatusString2NomStatus(getStatus());
3558
            			taxonNameBase.addStatus(NomenclaturalStatus.NewInstance(statusType));
3559
            		} catch (UnknownCdmTypeException e) {
3560
            			addProblematicStatusToFile(getStatus());
3561
            			logger.warn("Problem with status");
3562
            		}
3563
            	}
3564
            }
3565
            importer.getNameService().save(taxonNameBase);
3566

    
3567
            TaxonBase<?> tmpTaxonBase;
3568
            if (!isSynonym) {
3569
                tmpTaxonBase =Taxon.NewInstance(taxonNameBase, refMods); //sec set null
3570
            }
3571
            else {
3572
                tmpTaxonBase =Synonym.NewInstance(taxonNameBase, refMods); //sec set null
3573
            }
3574
            boolean exist = false;
3575
            if (!isSynonym){
3576
	            for (TaxonNode node : classification.getAllNodes()){
3577
	                try{
3578
	                	Taxon nodeTaxon = node.getTaxon();
3579
	                	boolean titleMatches = nodeTaxon.getTitleCache().equalsIgnoreCase(tmpTaxonBase.getTitleCache());
3580
	                	boolean nomStatusMatches = compareStatus(node.getTaxon(), statusType);
3581
	                	boolean nodeNameReplaceable = checkNodeNameReplaceable(nodeTaxon, tmpTaxonBase);
3582
	                    if(titleMatches && nomStatusMatches) {
3583
	                    	if (!isSynonym) {
3584
	                    		tmpTaxonBase=CdmBase.deproxy(nodeTaxon, TaxonBase.class);
3585
	                            exist =true;
3586
	                        } else {
3587
	                            logger.info("Found the same name but from another type (taxon/synonym)");
3588
	                            TaxonNameBase<?,?> existingTnb = getTaxon().getName();
3589
                                tmpTaxonBase = Synonym.NewInstance(existingTnb, refMods);
3590
                                importer.getTaxonService().saveOrUpdate(tmpTaxonBase);
3591
                                exist =true;
3592
                            }
3593
	                    }else if (nodeNameReplaceable){
3594
	                    	nodeTaxon.setName(tmpTaxonBase.getName());
3595
	                    	tmpTaxonBase = nodeTaxon;
3596
	                    	exist = true;
3597
	                    }
3598
	                }catch(NullPointerException n){logger.warn(" A taxon is either null or its titlecache is null - ignore it?");}
3599
	            }
3600
            }
3601
            if (!exist){
3602

    
3603
                boolean insertAsExisting =false;
3604
                List<Taxon> existingTaxons=new ArrayList<Taxon>();
3605
                try {
3606
                    existingTaxons = getMatchingTaxa(taxonNameBase);
3607
                } catch (Exception e1) {
3608
                    e1.printStackTrace();
3609
                }
3610
                double similarityScore=0.0;
3611
                double similarityAuthor=-1;
3612
                String author1="";
3613
                String author2="";
3614
                String t1="";
3615
                String t2="";
3616
                for (Taxon bestMatchingTaxon : existingTaxons){
3617
                    //System.out.println("tnbase "+taxonnamebase.getTitleCache());
3618
                    //System.out.println("bestex "+bestMatchingTaxon.getTitleCache());
3619
                    if(taxonNameBase.getAuthorshipCache()!=null) {
3620
                    	author1=taxonNameBase.getAuthorshipCache();
3621
                    }
3622
                    try {
3623
                        if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
3624
                            author2=bestMatchingTaxon.getName().getAuthorshipCache();
3625
                        }
3626
                    } catch (Exception e) {
3627
                        // TODO Auto-generated catch block
3628
                        e.printStackTrace();
3629
                    }
3630
                    try {
3631
                        t1=taxonNameBase.getTitleCache();
3632
                        if (author1!=null && !StringUtils.isEmpty(author1)) {
3633
                            t1=t1.split(Pattern.quote(author1))[0];
3634
                        }
3635
                    } catch (Exception e) {
3636
                        // TODO Auto-generated catch block
3637
                        e.printStackTrace();
3638
                    }
3639
                    try {
3640
                        t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
3641
                        if (author2!=null && !StringUtils.isEmpty(author2)) {
3642
                            t2=t2.split(Pattern.quote(author2))[0];
3643
                        }
3644
                    } catch (Exception e) {
3645
                        // TODO Auto-generated catch block
3646
                        e.printStackTrace();
3647
                    }
3648

    
3649
                    similarityScore=similarity(t1.trim(), t2.trim());
3650
                    //System.out.println("taxonscore "+similarityScore);
3651
                    similarityAuthor=similarity(author1.trim(), author2.trim());
3652
                    //System.out.println("authorscore "+similarityAuthor);
3653
                    insertAsExisting = compareAndCheckTaxon(taxonNameBase, refMods, similarityScore, bestMatchingTaxon, similarityAuthor);
3654
                    if(insertAsExisting) {
3655
                        tmpTaxonBase=bestMatchingTaxon;
3656
                        break;
3657
                    }
3658
                }
3659
                if ( !insertAsExisting ){
3660
                    if(!state2.getConfig().doKeepOriginalSecundum()) {
3661
                        tmpTaxonBase.setSec(state2.getConfig().getSecundum());
3662
                    }
3663

    
3664
                    //                    tmptaxonbase.setSec(refMods);
3665
                    if (taxonNameBase.getRank().equals(state2.getConfig().getMaxRank())) {
3666
                        //System.out.println("****************************"+tmptaxonbase);
3667
                        if (!isSynonym) {
3668
                            classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3669
                        }
3670
                    } else{
3671
                        hierarchy = new HashMap<Rank, Taxon>();
3672
                        //System.out.println("LOOK FOR PARENT "+taxonnamebase.toString()+", "+tmptaxonbase.toString());
3673
                        if (!isSynonym){
3674
                            lookForParentNode(taxonNameBase,(Taxon)tmpTaxonBase, refMods,this);
3675
                            //System.out.println("HIERARCHY "+hierarchy);
3676
                            Taxon parent = buildHierarchy();
3677
                            if(!taxonExistsInClassification(parent,(Taxon)tmpTaxonBase)){
3678
                                if(parent !=null) {
3679
                                    classification.addParentChild(parent, (Taxon)tmpTaxonBase, refMods, null);
3680
                                } else {
3681
                                    classification.addChildTaxon((Taxon)tmpTaxonBase, refMods, null);
3682
                                }
3683
                                importer.getClassificationService().saveOrUpdate(classification);
3684
                            }
3685
                        }
3686
                        //                        Set<TaxonNode> nodeList = classification.getAllNodes();
3687
                        //                        for(TaxonNode tn:nodeList) {
3688
                        //                            System.out.println(tn.getTaxon());
3689
                        //                        }
3690
                    }
3691
                }
3692
                importer.getClassificationService().saveOrUpdate(classification);
3693
                 if(isSynonym) {
3694
                    try{
3695
                        Synonym castTest=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3696
                    }catch(Exception e){
3697
                        TaxonNameBase<?,?> existingTnb = tmpTaxonBase.getName();
3698
                        Synonym castTest = Synonym.NewInstance(existingTnb, refMods);
3699
                        importer.getTaxonService().saveOrUpdate(castTest);
3700
                        tmpTaxonBase=CdmBase.deproxy(castTest, Synonym.class);
3701
                    }
3702
                }
3703
            }
3704
            if(!isSynonym) {
3705
                taxon=CdmBase.deproxy(tmpTaxonBase, Taxon.class);
3706
            } else {
3707
                syno=CdmBase.deproxy(tmpTaxonBase, Synonym.class);
3708
            }
3709

    
3710
        }
3711

    
3712
		private boolean checkNodeNameReplaceable(Taxon nodeTaxon, TaxonBase<?> newTaxon) {
3713
			//TODO preliminary check
3714
			if (newTaxon.isInstanceOf(Synonym.class)){
3715
				return false;
3716
			}
3717
			INonViralName nodeName = nodeTaxon.getName();
3718
			INonViralName newName = newTaxon.getName();
3719
			if (nodeTaxon.getName() == null ||  newName == null){
3720
				return false;
3721
			}
3722
			if (nodeTaxon.getDescriptions().size() > 0 || nodeName.getDescriptions().size() > 0 || nodeName.getTypeDesignations().size() > 0 ){
3723
				return false;
3724
			}
3725
			boolean compare = true;
3726
			for (NomenclaturalStatus status : newName.getStatus() ){
3727
				compare &= compareStatus(nodeTaxon, status.getType());
3728
			}
3729
			if (! compare){
3730
				return false;
3731
			}
3732

    
3733
			if (nodeName.getNameCache() != null && nodeName.getNameCache().equals(newName.getNameCache())){
3734
				if (nodeName.getNameCache().equals(nodeName.getTitleCache())){
3735
					if (newName.getNameCache().length() < newName.getTitleCache().length()){
3736
						logger.warn("We still need to check, if node was automatically created via hierarchy creation: " + nodeName.getNameCache());
3737
						return true;
3738
					}
3739
				}
3740
			}
3741

    
3742
			return false;
3743
		}
3744

    
3745
		/**
3746
         *
3747
         */
3748
        private Taxon buildHierarchy() {
3749
            logger.info("buildHierarchy");
3750
            Taxon higherTaxon = null;
3751
            //add the maxRank as a root
3752
            if(hierarchy.containsKey(state2.getConfig().getMaxRank())){
3753
                Taxon ct=hierarchy.get(state2.getConfig().getMaxRank());
3754
                if(!taxonExistsInClassification(higherTaxon, ct)) {
3755
                   classification.addChildTaxon(ct, refMods, null);
3756
                }
3757
                higherTaxon = hierarchy.get(state2.getConfig().getMaxRank());
3758
                //                return higherTaxon;
3759
            }
3760
            //add the relation to the highertaxon, except if the current rank to add IS the maxRank
3761

    
3762
            //TODO higher Ranks
3763

    
3764
            if(hierarchy.containsKey(Rank.FAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.FAMILY())){
3765
                higherTaxon=saveAndGetHigherTaxon(Rank.FAMILY(),higherTaxon);
3766
            }
3767
            if(hierarchy.containsKey(Rank.SUBFAMILY()) && !state2.getConfig().getMaxRank().equals(Rank.SUBFAMILY())){
3768
                higherTaxon=saveAndGetHigherTaxon(Rank.SUBFAMILY(),higherTaxon);
3769
            }
3770
            if(hierarchy.containsKey(Rank.TRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.TRIBE())){
3771
                higherTaxon=saveAndGetHigherTaxon(Rank.TRIBE(),higherTaxon);
3772
            }
3773
            if(hierarchy.containsKey(Rank.SUBTRIBE())&& !state2.getConfig().getMaxRank().equals(Rank.SUBTRIBE())){
3774
                higherTaxon=saveAndGetHigherTaxon(Rank.SUBTRIBE(),higherTaxon);
3775
            }
3776
            if(hierarchy.containsKey(Rank.GENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3777
                higherTaxon=saveAndGetHigherTaxon(Rank.GENUS(),higherTaxon);
3778
            }
3779
            if(hierarchy.containsKey(Rank.SUBGENUS())&& !state2.getConfig().getMaxRank().equals(Rank.SUBGENUS())){
3780
                higherTaxon=saveAndGetHigherTaxon(Rank.SUBGENUS(),higherTaxon);
3781
            }
3782
            importer.getClassificationService().saveOrUpdate(classification);
3783
            return higherTaxon;
3784
        }
3785

    
3786
        private Taxon saveAndGetHigherTaxon(Rank r, Taxon higherTaxon){
3787
            Taxon ct=hierarchy.get(r);
3788
            if(!taxonExistsInClassification(higherTaxon,ct )) {
3789
                if(higherTaxon != null && ct!=null) {
3790
                    classification.addParentChild(higherTaxon, ct, refMods, null);
3791
                } else
3792
                    if(higherTaxon == null && ct !=null) {
3793
                        classification.addChildTaxon(ct, refMods, null);
3794
                }
3795
            }
3796
            return ct;
3797
        }
3798

    
3799
        private boolean taxonExistsInClassification(Taxon parent, Taxon child){
3800
            logger.info("taxonExistsInClassification");
3801
            //            System.out.println("LOOK IF TAXA EXIST "+parent+", "+child);
3802
            boolean found=false;
3803
            if(parent !=null){
3804
                for (TaxonNode p : classification.getAllNodes()){
3805
                    if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
3806
                        for (TaxonNode c : p.getChildNodes()) {
3807
                            if (c.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3808
                                found=true;
3809
                                break;
3810
                            }
3811
                        }
3812
                    }
3813
                }
3814
            }
3815
            else{
3816
                for (TaxonNode p : classification.getAllNodes()){
3817
                    if(p.getTaxon().getTitleCache().equalsIgnoreCase(child.getTitleCache())) {
3818
                        found=true;
3819
                        break;
3820
                    }
3821
                }
3822
            }
3823
            //            System.out.println("LOOK IF TAXA EXIST? "+found);
3824
            return found;
3825
        }
3826
        /**
3827
         * @param nameToBeFilledTest
3828
         */
3829
        public void setParsedName(ITaxonNameBase nameToBeFilledTest) {
3830
            this.taxonNameBase = TaxonNameBase.castAndDeproxy(nameToBeFilledTest);
3831

    
3832
        }
3833
        //variety dwcranks:varietyEpithet
3834
        /**
3835
         * @return the author
3836
         */
3837
        public String getAuthor() {
3838
            return author;
3839
        }
3840
        /**
3841
         * @return
3842
         */
3843
        public Taxon getTaxon() {
3844
            return taxon;
3845
        }
3846
        /**
3847
         * @return
3848
         */
3849
        public TaxonNameBase<?,?> getTaxonNameBase() {
3850
            return taxonNameBase;
3851
        }
3852

    
3853
        /**
3854
         * @param findOrCreateTaxon
3855
         */
3856
        public void setForm(Taxon form) {
3857
            this.form=form;
3858

    
3859
        }
3860
        /**
3861
         * @param findOrCreateTaxon
3862
         */
3863
        public void setVariety(Taxon variety) {
3864
            this.variety=variety;
3865

    
3866
        }
3867
        /**
3868
         * @param string
3869
         * @return
3870
         */
3871
        @SuppressWarnings("rawtypes")
3872
        public Taxon findOrCreateTaxon(String partialname,String fullname, Rank rank, Rank globalrank) {
3873
            logger.info("findOrCreateTaxon");
3874
            sourceUrlRef=CdmBase.deproxy(sourceUrlRef, Reference.class);
3875
            //takes too much time
3876
            //            List<TaxonBase> tmpList = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
3877
            //            logger.info("tmpList returned: "+tmpList.size());
3878

    
3879
            NomenclaturalStatusType statusType = null;
3880
            if (!getStatus().isEmpty()){
3881
                try {
3882
                    statusType = nomStatusString2NomStatus(getStatus());
3883
                } catch (UnknownCdmTypeException e) {
3884
                    addProblematicStatusToFile(getStatus());
3885
                    logger.warn("Problem with status");
3886
                }
3887
            }
3888

    
3889
            List<TaxonBase> tmpListFiltered = new ArrayList<TaxonBase>();
3890

    
3891
            Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, fullname, MatchMode.BEGINNING, null, null, null, null, null);
3892

    
3893
            tmpListFiltered.addAll(taxontest.getRecords());
3894
            taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, partialname, MatchMode.BEGINNING, null, null, null, null, null);
3895
            tmpListFiltered.addAll(taxontest.getRecords());
3896

    
3897
            //logger.info("tmpListFiltered returned: "+tmpListFiltered.size());
3898

    
3899
            boolean nameCorrected=false;
3900
            if (fullname.indexOf(partialname)<0) {
3901
                nameCorrected=true;
3902
            }
3903

    
3904
            boolean foundIdentic=false;
3905
            Taxon tmp=null;
3906
            for (TaxonBase tmpb:tmpListFiltered){
3907
                if(tmpb !=null){
3908
                    TaxonNameBase tnb =  tmpb.getName();
3909
                    Rank crank=null;
3910
                    if (tnb != null){
3911
                         if(globalrank.equals(rank) || (globalrank.isLower(Rank.SPECIES()) && rank.equals(Rank.SPECIES()))){
3912
                            if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname) ){
3913
                                crank =tnb.getRank();
3914
                                if (crank !=null && rank !=null){
3915
                                    if (crank.equals(rank)){
3916
                                        foundIdentic=true;
3917
                                        try{
3918
                                            tmp=(Taxon)tmpb;
3919
                                            break;
3920
                                        }catch(Exception e){
3921
                                            e.printStackTrace();
3922
                                        }
3923
                                    }
3924
                                }
3925
                            }
3926
                            if(nameCorrected){ //for corrected names such as Anochetus -- A. blf-pat
3927
                                if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3928
                                    crank =tnb.getRank();
3929
                                    if (crank !=null && rank !=null){
3930
                                        if (crank.equals(rank)){
3931
                                            foundIdentic=true;
3932
                                            try{
3933
                                                tmp=(Taxon)tmpb;
3934
                                                break;
3935
                                            }catch(Exception e){
3936
                                                e.printStackTrace();
3937
                                            }
3938
                                        }
3939
                                    }
3940
                                }
3941
                            }
3942
                        }
3943
                        else{
3944
                            if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(partialname) ){
3945
                                crank =tnb.getRank();
3946
                                if (crank !=null && rank !=null){
3947
                                    if (crank.equals(rank)){
3948
                                        foundIdentic=true;
3949
                                        try{
3950
                                            tmp=(Taxon)tmpb;
3951
                                            break;
3952
                                        }catch(Exception e){
3953
                                            e.printStackTrace();
3954
                                        }
3955
                                    }
3956
                                }
3957
                            }
3958
                        }
3959
                    }
3960
                }
3961
            }
3962
            boolean statusMatch=false;
3963
            boolean appendedMatch=false;
3964
            if(tmp !=null && foundIdentic){
3965
                statusMatch=compareStatus(tmp, statusType);
3966
                if (!getStatus().isEmpty() && ! (tmp.getAppendedPhrase() == null)) {
3967
                    appendedMatch=tmp.getAppendedPhrase().equals(getStatus());
3968
                }
3969
                if (getStatus().isEmpty() && tmp.getAppendedPhrase() == null) {
3970
                    appendedMatch=true;
3971
                }
3972

    
3973
            }
3974
            if ((tmp == null || !foundIdentic) ||  (tmp != null && !statusMatch) ||  (tmp != null && !appendedMatch && !statusMatch)){
3975

    
3976
                INonViralName tnb = getNonViralNameAccNomenclature();
3977
                tnb.setRank(rank);
3978

    
3979
                if(statusType != null) {
3980
                    tnb.addStatus(NomenclaturalStatus.NewInstance(statusType));
3981
                }
3982
                if(StringUtils.isNotBlank(getStatus())) {
3983
                    tnb.setAppendedPhrase(getStatus());
3984
                }
3985

    
3986
                if(rank.equals(Rank.UNKNOWN_RANK())){
3987
                    tnb.setTitleCache(fullname, true);
3988
                    //                    tnb.setGenusOrUninomial(fullname);
3989
                }
3990
                if(rank.isHigher(Rank.GENUS())) {
3991
                    tnb.setGenusOrUninomial(partialname);
3992
                }
3993

    
3994
                if(rank.isHigher(Rank.SPECIES())) {
3995
                    tnb.setTitleCache(partialname, true);
3996
                }
3997

    
3998
                if (rank.equals(globalrank) && author != null) {
3999

    
4000
                    tnb.setCombinationAuthorship(findOrCreateAuthor(author));
4001
                    if (getIdentifier() !=null && !getIdentifier().isEmpty()){
4002
                        Taxon taxonLSID = getTaxonByLSID(getIdentifier());
4003
                        if (taxonLSID !=null) {
4004
                            tmp=taxonLSID;
4005
                        }
4006
                    }
4007
                }
4008

    
4009
                if(tmp == null){
4010
                    if (rank.equals(Rank.FAMILY())) {
4011
                        tmp = buildFamily(tnb);
4012
                    }
4013
                    if (rank.equals(Rank.SUBFAMILY())) {
4014
                        tmp = buildSubfamily(tnb);
4015
                    }
4016
                    if (rank.equals(Rank.TRIBE())) {
4017
                        tmp = buildTribe(tnb);
4018
                    }
4019
                    if (rank.equals(Rank.SUBTRIBE())) {
4020
                        tmp = buildSubtribe(tnb);
4021
                    }
4022
                    if (rank.equals(Rank.GENUS())) {
4023
                        tmp = buildGenus(partialname, tnb);
4024
                    }
4025

    
4026
                    if (rank.equals(Rank.SUBGENUS())) {
4027
                        tmp = buildSubgenus(partialname, tnb);
4028
                    }
4029
                    if (rank.equals(Rank.SPECIES())) {
4030
                        tmp = buildSpecies(partialname, tnb);
4031
                    }
4032

    
4033
                    if (rank.equals(Rank.SUBSPECIES())) {
4034
                        tmp = buildSubspecies(partialname, tnb);
4035
                    }
4036

    
4037
                    if (rank.equals(Rank.VARIETY())) {
4038
                        tmp = buildVariety(fullname, partialname, tnb);
4039
                    }
4040

    
4041
                    if (rank.equals(Rank.FORM())) {
4042
                        tmp = buildForm(fullname, partialname, tnb);
4043
                    }
4044
                    if (tmp != null){
4045
                    	TaxonXTreatmentExtractor.this.sourceHandler.addSource(refMods, tmp);
4046
                    }
4047

    
4048
                    importer.getClassificationService().saveOrUpdate(classification);
4049
                }
4050

    
4051
            }
4052

    
4053
            tmp = CdmBase.deproxy(tmp, Taxon.class);
4054
            if (rank.equals(globalrank) && author != null) {
4055
                if (!getIdentifier().isEmpty() && (getIdentifier().length()>2)){
4056
                    setLSID(getIdentifier(), tmp);
4057
                    importer.getTaxonService().saveOrUpdate(tmp);
4058
                    tmp = CdmBase.deproxy(tmp, Taxon.class);
4059
                }
4060
            }
4061

    
4062
            this.taxon=tmp;
4063

    
4064
            return tmp;
4065
        }
4066

    
4067
        /**
4068
         * @param tnb
4069
         * @return
4070
         */
4071
        private Taxon buildSubfamily(INonViralName tnb) {
4072
            Taxon tmp;
4073
            //            tnb.generateTitle();
4074
            tmp = findMatchingTaxon(tnb,refMods);
4075
            if(tmp ==null){
4076
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4077
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4078
                    tmp.setSec(state2.getConfig().getSecundum());
4079
                }
4080
                //                tmp.setSec(refMods);
4081
                //                sourceHandler.addSource(refMods, tmp);
4082
                if(family != null) {
4083
                    classification.addParentChild(family, tmp, null, null);
4084
                    higherRank=Rank.FAMILY();
4085
                    higherTaxa=family;
4086
                } else {
4087
                    //System.out.println("ADDCHILDTAXON SUBFAMILY "+tmp);
4088
                    classification.addChildTaxon(tmp, null, null);
4089
                }
4090
            }
4091
            return tmp;
4092
        }
4093
        /**
4094
         * @param tnb
4095
         * @return
4096
         */
4097
        private Taxon buildFamily(INonViralName tnb) {
4098
            Taxon tmp;
4099
            //            tnb.generateTitle();
4100
            tmp = findMatchingTaxon(tnb,refMods);
4101
            if(tmp ==null){
4102
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4103
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4104
                    tmp.setSec(state2.getConfig().getSecundum());
4105
                }
4106
                //                tmp.setSec(refMods);
4107
                //sourceHandler.addSource(refMods, tmp);
4108
                //System.out.println("ADDCHILDTAXON FAMILY "+tmp);
4109
                classification.addChildTaxon(tmp, null, null);
4110
            }
4111
            return tmp;
4112
        }
4113
        /**
4114
         * @param fullname
4115
         * @param tnb
4116
         * @return
4117
         */
4118
        private Taxon buildForm(String fullname, String partialname, INonViralName tnb) {
4119
            if (genusName !=null) {
4120
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4121
            }
4122
            if (subgenusName !=null) {
4123
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4124
            }
4125
            if(speciesName !=null) {
4126
                tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4127
            }
4128
            if(subspeciesName != null) {
4129
                tnb.setInfraSpecificEpithet(subspeciesName.getInfraSpecificEpithet());
4130
            }
4131
            if(partialname!= null) {
4132
                tnb.setInfraSpecificEpithet(partialname);
4133
            }
4134
             //TODO how to save form??
4135
            tnb.setTitleCache(fullname, true);
4136
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4137
            if(tmp ==null){
4138
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4139
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4140
                    tmp.setSec(state2.getConfig().getSecundum());
4141
                }
4142
                //                tmp.setSec(refMods);
4143
                //sourceHandler.addSource(refMods, tmp);
4144
                if (subspecies !=null) {
4145
                    classification.addParentChild(subspecies, tmp, null, null);
4146
                    higherRank=Rank.SUBSPECIES();
4147
                    higherTaxa=subspecies;
4148
                } else {
4149
                    if (species !=null) {
4150
                        classification.addParentChild(species, tmp, null, null);
4151
                        higherRank=Rank.SPECIES();
4152
                        higherTaxa=species;
4153
                    }
4154
                    else{
4155
                        //                        System.out.println("ADDCHILDTAXON FORM "+tmp);
4156
                        classification.addChildTaxon(tmp, null, null);
4157
                    }
4158
                }
4159
            }
4160
            return tmp;
4161
        }
4162
        /**
4163
         * @param fullname
4164
         * @param tnb
4165
         * @return
4166
         */
4167
        private Taxon buildVariety(String fullname, String partialname, INonViralName tnb) {
4168
            Taxon tmp;
4169
            if (genusName !=null) {
4170
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4171
            }
4172
            if (subgenusName !=null) {
4173
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4174
            }
4175
            if(speciesName !=null) {
4176
                tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4177
            }
4178
            if(subspeciesName != null) {
4179
                tnb.setInfraSpecificEpithet(subspeciesName.getSpecificEpithet());
4180
            }
4181
            if(partialname != null) {
4182
                tnb.setInfraSpecificEpithet(partialname);
4183
            }
4184
            //TODO how to save variety?
4185
            tnb.setTitleCache(fullname, true);
4186
            tmp = findMatchingTaxon(tnb,refMods);
4187
            if(tmp ==null){
4188
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4189
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4190
                    tmp.setSec(state2.getConfig().getSecundum());
4191
                }
4192
                //                tmp.setSec(refMods);
4193
                //sourceHandler.addSource(refMods, tmp);
4194
                if (subspecies !=null) {
4195
                    classification.addParentChild(subspecies, tmp, null, null);
4196
                    higherRank=Rank.SUBSPECIES();
4197
                    higherTaxa=subspecies;
4198
                } else {
4199
                    if(species !=null) {
4200
                        classification.addParentChild(species, tmp, null, null);
4201
                        higherRank=Rank.SPECIES();
4202
                        higherTaxa=species;
4203
                    }
4204
                    else{
4205
                        //System.out.println("ADDCHILDTAXON VARIETY "+tmp);
4206
                        classification.addChildTaxon(tmp, null, null);
4207
                    }
4208
                }
4209
            }
4210
            return tmp;
4211
        }
4212
        /**
4213
         * @param partialname
4214
         * @param tnb
4215
         * @return
4216
         */
4217
        private Taxon buildSubspecies(String partialname, INonViralName tnb) {
4218
            if (genusName !=null) {
4219
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4220
            }
4221
            if (subgenusName !=null) {
4222
                //                            System.out.println("SUB:"+subgenusName.getInfraGenericEpithet());
4223
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4224
            }
4225
            if(speciesName !=null) {
4226
                //                            System.out.println("SPE:"+speciesName.getSpecificEpithet());
4227
                tnb.setSpecificEpithet(speciesName.getSpecificEpithet());
4228
            }
4229
            tnb.setInfraSpecificEpithet(partialname);
4230
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4231
            if(tmp ==null){
4232
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4233
                if(!state2.getConfig().doKeepOriginalSecundum())
4234
                 {
4235
                    tmp.setSec(state2.getConfig().getSecundum());
4236
                //                tmp.setSec(refMods);
4237
                //sourceHandler.addSource(refMods, tmp);
4238
                }
4239

    
4240
                if(species != null) {
4241
                    classification.addParentChild(species, tmp, null, null);
4242
                    higherRank=Rank.SPECIES();
4243
                    higherTaxa=species;
4244
                }
4245
                else{
4246
                    //System.out.println("ADDCHILDTAXON SUBSPECIES "+tmp);
4247
                    classification.addChildTaxon(tmp, null, null);
4248
                }
4249
            }
4250
            return tmp;
4251
        }
4252
        /**
4253
         * @param partialname
4254
         * @param tnb
4255
         * @return
4256
         */
4257
        private Taxon buildSpecies(String partialname, INonViralName tnb) {
4258
            if (genusName !=null) {
4259
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4260
            }
4261
            if (subgenusName !=null) {
4262
                tnb.setInfraGenericEpithet(subgenusName.getInfraGenericEpithet());
4263
            }
4264
            tnb.setSpecificEpithet(partialname.toLowerCase());
4265
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4266
            if(tmp ==null){
4267
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4268
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4269
                    tmp.setSec(state2.getConfig().getSecundum());
4270
                }
4271
                //                tmp.setSec(refMods);
4272
                //sourceHandler.addSource(refMods, tmp);
4273
                if (subgenus !=null) {
4274
                    classification.addParentChild(subgenus, tmp, null, null);
4275
                    higherRank=Rank.SUBGENUS();
4276
                    higherTaxa=subgenus;
4277
                } else {
4278
                    if (genus !=null) {
4279
                        classification.addParentChild(genus, tmp, null, null);
4280
                        higherRank=Rank.GENUS();
4281
                        higherTaxa=genus;
4282
                    }
4283
                    else{
4284
                        //System.out.println("ADDCHILDTAXON SPECIES "+tmp);
4285
                        classification.addChildTaxon(tmp, null, null);
4286
                    }
4287
                }
4288
            }
4289
            return tmp;
4290
        }
4291
        /**
4292
         * @param partialname
4293
         * @param tnb
4294
         * @return
4295
         */
4296
        private Taxon buildSubgenus(String partialname, INonViralName tnb) {
4297
            tnb.setInfraGenericEpithet(partialname);
4298
            if (genusName !=null) {
4299
                tnb.setGenusOrUninomial(genusName.getGenusOrUninomial());
4300
            }
4301
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4302
            if(tmp ==null){
4303
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4304
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4305
                    tmp.setSec(state2.getConfig().getSecundum());
4306
                }
4307
                //                tmp.setSec(refMods);
4308
                //sourceHandler.addSource(refMods, tmp);
4309
                if(genus != null) {
4310
                    classification.addParentChild(genus, tmp, null, null);
4311
                    higherRank=Rank.GENUS();
4312
                    higherTaxa=genus;
4313
                } else{
4314
                    //System.out.println("ADDCHILDTAXON SUBGENUS "+tmp);
4315
                    classification.addChildTaxon(tmp, null, null);
4316
                }
4317
            }
4318
            return tmp;
4319
        }
4320
        /**
4321
         * @param partialname
4322
         * @param tnb
4323
         * @return
4324
         */
4325
        private Taxon buildGenus(String partialname, INonViralName tnb) {
4326
            Taxon tmp;
4327
            tnb.setGenusOrUninomial(partialname);
4328

    
4329

    
4330
            tmp = findMatchingTaxon(tnb,refMods);
4331
            if(tmp ==null){
4332
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4333
                if(!state2.getConfig().doKeepOriginalSecundum())
4334
                 {
4335
                    tmp.setSec(state2.getConfig().getSecundum());
4336
                //                tmp.setSec(refMods);
4337
                //sourceHandler.addSource(refMods, tmp);
4338
                }
4339

    
4340
                if(subtribe != null) {
4341
                    classification.addParentChild(subtribe, tmp, null, null);
4342
                    higherRank=Rank.SUBTRIBE();
4343
                    higherTaxa=subtribe;
4344
                } else{
4345
                    if(tribe !=null) {
4346
                        classification.addParentChild(tribe, tmp, null, null);
4347
                        higherRank=Rank.TRIBE();
4348
                        higherTaxa=tribe;
4349
                    } else{
4350
                        if(subfamily !=null) {
4351
                            classification.addParentChild(subfamily, tmp, null, null);
4352
                            higherRank=Rank.SUBFAMILY();
4353
                            higherTaxa=subfamily;
4354
                        } else
4355
                            if(family !=null) {
4356
                                classification.addParentChild(family, tmp, null, null);
4357
                                higherRank=Rank.FAMILY();
4358
                                higherTaxa=family;
4359
                            }
4360
                            else{
4361
                                //System.out.println("ADDCHILDTAXON GENUS "+tmp);
4362
                                classification.addChildTaxon(tmp, null, null);
4363
                            }
4364
                    }
4365
                }
4366
            }
4367
            return tmp;
4368
        }
4369

    
4370
        /**
4371
         * @param tnb
4372
         * @return
4373
         */
4374
        private Taxon buildSubtribe(INonViralName tnb) {
4375
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4376
            if(tmp==null){
4377
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4378
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4379
                    tmp.setSec(state2.getConfig().getSecundum());
4380
                }
4381
                //                tmp.setSec(refMods);
4382
                //sourceHandler.addSource(refMods, tmp);
4383
                if(tribe != null) {
4384
                    classification.addParentChild(tribe, tmp, null, null);
4385
                    higherRank=Rank.TRIBE();
4386
                    higherTaxa=tribe;
4387
                } else{
4388
                    //System.out.println("ADDCHILDTAXON SUBTRIBE "+tmp);
4389
                    classification.addChildTaxon(tmp, null, null);
4390
                }
4391
            }
4392
            return tmp;
4393
        }
4394
        /**
4395
         * @param tnb
4396
         * @return
4397
         */
4398
        private Taxon buildTribe(INonViralName tnb) {
4399
            Taxon tmp = findMatchingTaxon(tnb,refMods);
4400
            if(tmp==null){
4401
                tmp = Taxon.NewInstance(tnb, sourceUrlRef);
4402
                if(!state2.getConfig().doKeepOriginalSecundum()) {
4403
                    tmp.setSec(state2.getConfig().getSecundum());
4404
                }
4405
                //                tmp.setSec(refMods);
4406
                //sourceHandler.addSource(refMods, tmp);
4407
                if (subfamily !=null) {
4408
                    classification.addParentChild(subfamily, tmp, null, null);
4409
                    higherRank=Rank.SUBFAMILY();
4410
                    higherTaxa=subfamily;
4411
                } else {
4412
                    if(family != null) {
4413
                        classification.addParentChild(family, tmp, null, null);
4414
                        higherRank=Rank.FAMILY();
4415
                        higherTaxa=family;
4416
                    }
4417
                    else{
4418
                        //System.out.println("ADDCHILDTAXON TRIBE "+tmp);
4419
                        classification.addChildTaxon(tmp, null, null);
4420
                    }
4421
                }
4422
            }
4423
            return tmp;
4424
        }
4425

    
4426
        /**
4427
         * @param identifier2
4428
         * @return
4429
         */
4430
        @SuppressWarnings("rawtypes")
4431
        private Taxon getTaxonByLSID(String identifier) {
4432
            //logger.info("getTaxonByLSID");
4433
            //            boolean lsidok=false;
4434
            String id = identifier.split("__")[0];
4435
            //            String source = identifier.split("__")[1];
4436
            LSID lsid = null;
4437
            if (id.indexOf("lsid")>-1){
4438
                try {
4439
                    lsid = new LSID(id);
4440
                    //                    lsidok=true;
4441
                } catch (MalformedLSIDException e) {
4442
                    logger.warn("Malformed LSID");
4443
                }
4444
            }
4445
            if (lsid !=null){
4446
                List<Taxon> taxa = importer.getTaxonService().list(Taxon.class, 0, 0, null, null);
4447
                LSID currentlsid=null;
4448
                for (Taxon t:taxa){
4449
                    currentlsid = t.getLsid();
4450
                    if (currentlsid !=null){
4451
                        if (currentlsid.getLsid().equals(lsid.getLsid())){
4452
                            try{
4453
                                return t;
4454
                            }
4455
                            catch(Exception e){logger.warn("Exception occurred while comparing LSIDs "+e );}
4456
                        }
4457
                    }
4458
                }
4459
            }
4460
            return null;
4461
        }
4462
        /**
4463
         * @param author2
4464
         * @return
4465
         */
4466
        @SuppressWarnings("rawtypes")
4467
        private Person findOrCreateAuthor(String author2) {
4468
            //logger.info("findOrCreateAuthor");
4469
            List<UuidAndTitleCache<Person>> hiberPersons = importer.getAgentService().getPersonUuidAndTitleCache();
4470
            for (UuidAndTitleCache<Person> hibernateP:hiberPersons){
4471
                if(hibernateP.getTitleCache().equals(author2)) {
4472
                    AgentBase existing = importer.getAgentService().find(hibernateP.getUuid());
4473
                    return CdmBase.deproxy(existing, Person.class);
4474
                }
4475
            }
4476
            Person p = Person.NewInstance();
4477
            p.setTitleCache(author2,true);
4478
            importer.getAgentService().saveOrUpdate(p);
4479
            return CdmBase.deproxy(p, Person.class);
4480
        }
4481
        /**
4482
         * @param author the author to set
4483
         */
4484
        public void setAuthor(String author) {
4485
            this.author = author;
4486
        }
4487

    
4488
        /**
4489
         * @return the higherTaxa
4490
         */
4491
        public Taxon getHigherTaxa() {
4492
            return higherTaxa;
4493
        }
4494
        /**
4495
         * @param higherTaxa the higherTaxa to set
4496
         */
4497
        public void setHigherTaxa(Taxon higherTaxa) {
4498
            this.higherTaxa = higherTaxa;
4499
        }
4500
        /**
4501
         * @return the higherRank
4502
         */
4503
        public Rank getHigherRank() {
4504
            return higherRank;
4505
        }
4506
        /**
4507
         * @param higherRank the higherRank to set
4508
         */
4509
        public void setHigherRank(Rank higherRank) {
4510
            this.higherRank = higherRank;
4511
        }
4512
        public String getName(){
4513
            if (newName.isEmpty()) {
4514
                return originalName;
4515
            } else {
4516
                return newName;
4517
            }
4518

    
4519
        }
4520
        /**
4521
         * @return the fullName
4522
         */
4523
        public String getOriginalName() {
4524
            return originalName;
4525
        }
4526
        /**
4527
         * @param fullName the fullName to set
4528
         */
4529
        public void setOriginalName(String fullName) {
4530
            this.originalName = fullName;
4531
        }
4532
        /**
4533
         * @return the newName
4534
         */
4535
        public String getNewName() {
4536
            return newName;
4537
        }
4538
        /**
4539
         * @param newName the newName to set
4540
         */
4541
        public void setNewName(String newName) {
4542
            this.newName = newName;
4543
        }
4544
        /**
4545
         * @return the rank
4546
         */
4547
        public Rank getRank() {
4548
            return rank;
4549
        }
4550
        /**
4551
         * @param rank the rank to set
4552
         */
4553
        public void setRank(Rank rank) {
4554
            this.rank = rank;
4555
        }
4556
        /**
4557
         * @return the idenfitiger
4558
         */
4559
        public String getIdentifier() {
4560
            return identifier;
4561
        }
4562
        /**
4563
         * @param idenfitiger the idenfitiger to set
4564
         */
4565
        public void setIdentifier(String identifier) {
4566
            this.identifier = identifier;
4567
        }
4568
        /**
4569
         * @return the status
4570
         */
4571
        public String getStatus() {
4572
            if (status == null) {
4573
                return "";
4574
            }
4575
            return status;
4576
        }
4577
        /**
4578
         * @param status the status to set
4579
         */
4580
        public void setStatus(String status) {
4581
            this.status = status;
4582
        }
4583
        /**
4584
         * @return the family
4585
         */
4586
        public Taxon getFamily() {
4587
            return family;
4588
        }
4589
        /**
4590
         * @param family the family to set
4591
         */
4592
        @SuppressWarnings("rawtypes")
4593
        public void setFamily(Taxon family) {
4594
            this.family = family;
4595
            familyName = CdmBase.deproxy(family.getName());
4596
        }
4597
        /**
4598
         * @return the subfamily
4599
         */
4600
        public Taxon getSubfamily() {
4601
            return subfamily;
4602
        }
4603
        /**
4604
         * @param subfamily the subfamily to set
4605
         */
4606
        @SuppressWarnings("rawtypes")
4607
        public void setSubfamily(Taxon subfamily) {
4608
            this.subfamily = subfamily;
4609
            subfamilyName = CdmBase.deproxy(subfamily.getName());
4610
        }
4611
        /**
4612
         * @return the tribe
4613
         */
4614
        public Taxon getTribe() {
4615
            return tribe;
4616
        }
4617
        /**
4618
         * @param tribe the tribe to set
4619
         */
4620
        @SuppressWarnings("rawtypes")
4621
        public void setTribe(Taxon tribe) {
4622
            this.tribe = tribe;
4623
            tribeName = CdmBase.deproxy(tribe.getName());
4624
        }
4625
        /**
4626
         * @return the subtribe
4627
         */
4628
        public Taxon getSubtribe() {
4629
            return subtribe;
4630
        }
4631
        /**
4632
         * @param subtribe the subtribe to set
4633
         */
4634
        @SuppressWarnings("rawtypes")
4635
        public void setSubtribe(Taxon subtribe) {
4636
            this.subtribe = subtribe;
4637
            subtribeName =CdmBase.deproxy(subtribe.getName());
4638
        }
4639
        /**
4640
         * @return the genus
4641
         */
4642
        public Taxon getGenus() {
4643
            return genus;
4644
        }
4645
        /**
4646
         * @param genus the genus to set
4647
         */
4648
        @SuppressWarnings("rawtypes")
4649
        public void setGenus(Taxon genus) {
4650
            if (genus != null){
4651
	        	this.genus = genus;
4652
	            genusName = CdmBase.deproxy(genus.getName());
4653
            }
4654
        }
4655
        /**
4656
         * @return the subgenus
4657
         */
4658
        public Taxon getSubgenus() {
4659
            return subgenus;
4660
        }
4661
        /**
4662
         * @param subgenus the subgenus to set
4663
         */
4664
        @SuppressWarnings("rawtypes")
4665
        public void setSubgenus(Taxon subgenus) {
4666
            this.subgenus = subgenus;
4667
            subgenusName = CdmBase.deproxy(subgenus.getName());
4668
        }
4669
        /**
4670
         * @return the species
4671
         */
4672
        public Taxon getSpecies() {
4673
            return species;
4674
        }
4675
        /**
4676
         * @param species the species to set
4677
         */
4678
        public void setSpecies(Taxon species) {
4679
        	if (species != null){
4680
	            this.species = species;
4681
	            speciesName = CdmBase.deproxy(species.getName());
4682
        	}
4683
        }
4684
        /**
4685
         * @return the subspecies
4686
         */
4687
        public Taxon getSubspecies() {
4688
            return subspecies;
4689
        }
4690
        /**
4691
         * @param subspecies the subspecies to set
4692
         */
4693
        @SuppressWarnings("rawtypes")
4694
        public void setSubspecies(Taxon subspecies) {
4695
            this.subspecies = subspecies;
4696
            subspeciesName = CdmBase.deproxy(subspecies.getName());
4697

    
4698
        }
4699

    
4700

    
4701

    
4702
    }
4703

    
4704

    
4705
    /**
4706
     * @param status
4707
     */
4708
    private void addProblematicStatusToFile(String status) {
4709
        try{
4710
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "StatusUnknown_"+classification.getTitleCache()+".txt",true);
4711
            BufferedWriter out = new BufferedWriter(fstream);
4712
            out.write(status+"\n");
4713
            //Close the output stream
4714
            out.close();
4715
        }catch (Exception e){//Catch exception if any
4716
            System.err.println("Error: " + e.getMessage());
4717
        }
4718

    
4719
    }
4720

    
4721

    
4722

    
4723
    /**
4724
     * @param tnb
4725
     * @return
4726
     */
4727
    private Taxon findMatchingTaxon(INonViralName tnb, Reference refMods) {
4728
        logger.info("findMatchingTaxon");
4729
        Taxon tmp=null;
4730

    
4731
        refMods=CdmBase.deproxy(refMods, Reference.class);
4732
        boolean insertAsExisting =false;
4733
        List<Taxon> existingTaxa = new ArrayList<Taxon>();
4734
        try {
4735
            existingTaxa = getMatchingTaxa(tnb);
4736
        } catch (Exception e1) {
4737
            // TODO Auto-generated catch block
4738
            e1.printStackTrace();
4739
        }
4740
        double similarityScore=0.0;
4741
        double similarityAuthor=-1;
4742
        String author1="";
4743
        String author2="";
4744
        String t1="";
4745
        String t2="";
4746
        for (Taxon bestMatchingTaxon : existingTaxa){
4747
            if (!existingTaxa.isEmpty() && state2.getConfig().isInteractWithUser() && !insertAsExisting) {
4748
                //                System.out.println("tnb "+tnb.getTitleCache());
4749
                //                System.out.println("ext "+bestMatchingTaxon.getTitleCache());
4750
                try {
4751
                    if(tnb.getAuthorshipCache()!=null) {
4752
                        author1=tnb.getAuthorshipCache();
4753
                    }
4754
                } catch (Exception e) {
4755
                    // TODO Auto-generated catch block
4756
                    e.printStackTrace();
4757
                }
4758
                try {
4759
                    if(bestMatchingTaxon.getName().getAuthorshipCache()!=null) {
4760
                        author2=bestMatchingTaxon.getName().getAuthorshipCache();
4761
                    }
4762
                } catch (Exception e) {
4763
                    // TODO Auto-generated catch block
4764
                    e.printStackTrace();
4765
                }
4766
                try {
4767
                    t1=tnb.getTitleCache().split("sec.")[0].trim();
4768
                    if (author1!=null && !StringUtils.isEmpty(author1)) {
4769
                        t1=t1.split(Pattern.quote(author1))[0];
4770
                    }
4771
                } catch (Exception e) {
4772
                    // TODO Auto-generated catch block
4773
                    e.printStackTrace();
4774
                }
4775
                try {
4776
                    t2=bestMatchingTaxon.getTitleCache().split("sec.")[0].trim();
4777
                    if (author2!=null && !StringUtils.isEmpty(author2)) {
4778
                        t2=t2.split(Pattern.quote(author2))[0];
4779
                    }
4780
                } catch (Exception e) {
4781
                    // TODO Auto-generated catch block
4782
                    e.printStackTrace();
4783
                }
4784
                similarityScore=similarity(t1.trim(), t2.trim());
4785
                //                System.out.println("taxascore: "+similarityScore);
4786
                similarityAuthor=similarity(author1.trim(), author2.trim());
4787
                //                System.out.println("authorscore: "+similarityAuthor);
4788
                insertAsExisting = compareAndCheckTaxon(tnb, refMods, similarityScore, bestMatchingTaxon,similarityAuthor);
4789
            }
4790
            if(insertAsExisting) {
4791
                //System.out.println("KEEP "+bestMatchingTaxon.toString());
4792
                tmp=bestMatchingTaxon;
4793
                sourceHandler.addSource(refMods, tmp);
4794
                return tmp;
4795
            }
4796
        }
4797
        return tmp;
4798
    }
4799

    
4800

    
4801
    /**
4802
     * @param tnb
4803
     * @param refMods
4804
     * @param similarityScore
4805
     * @param bestMatchingTaxon
4806
     * @param similarityAuthor
4807
     * @return
4808
     */
4809
    private boolean compareAndCheckTaxon(INonViralName tnb, Reference refMods, double similarityScore,
4810
            Taxon bestMatchingTaxon, double similarityAuthor) {
4811
        //logger.info("compareAndCheckTaxon");
4812
        boolean insertAsExisting;
4813
        //        if (tnb.getTitleCache().split("sec.")[0].equalsIgnoreCase("Chenopodium") && bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium album")>-1) {
4814
        //            insertAsExisting=false;
4815
        //        } else{
4816
        //a small hack/automatisation for Chenopodium only
4817
        if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase("Chenopodium") &&
4818
                bestMatchingTaxon.getTitleCache().split("sec.")[0].indexOf("Chenopodium L.")>-1) {
4819
            insertAsExisting=true;
4820
        } else {
4821
            insertAsExisting=askIfReuseBestMatchingTaxon(tnb, bestMatchingTaxon, refMods, similarityScore,similarityAuthor);
4822
        }
4823
        //        }
4824

    
4825
        logDecision(tnb, bestMatchingTaxon, insertAsExisting, refMods);
4826
        return insertAsExisting;
4827
    }
4828

    
4829
    /**
4830
     * @return
4831
     */
4832
    @SuppressWarnings("rawtypes")
4833
    private List<Taxon> getMatchingTaxa(ITaxonNameBase tnb) {
4834
        //logger.info("getMatchingTaxon");
4835
    	if (tnb.getTitleCache() == null){
4836
    		tnb.setTitleCache(tnb.toString(), tnb.isProtectedTitleCache());
4837
    	}
4838

    
4839
        Pager<TaxonBase> pager=importer.getTaxonService().findByTitle(TaxonBase.class, tnb.getTitleCache().split("sec.")[0].trim(), MatchMode.BEGINNING, null, null, null, null, null);
4840
        List<TaxonBase>records = pager.getRecords();
4841

    
4842
        List<Taxon> existingTaxons = new ArrayList<Taxon>();
4843
        for (TaxonBase r:records){
4844
            try{
4845
                Taxon bestMatchingTaxon = (Taxon)r;
4846
                //                System.out.println("best: "+bestMatchingTaxon.getTitleCache());
4847
                if(compareTaxonNameLength(bestMatchingTaxon.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4848
                    existingTaxons.add(bestMatchingTaxon);
4849
                }
4850
            }catch(ClassCastException e){logger.warn("classcast exception, might be a synonym, ignore it");}
4851
        }
4852
        Taxon bmt = importer.getTaxonService().findBestMatchingTaxon(tnb.getTitleCache());
4853
        if (!existingTaxons.contains(bmt) && bmt!=null) {
4854
            if(compareTaxonNameLength(bmt.getTitleCache().split(".sec")[0],tnb.getTitleCache().split(".sec")[0])) {
4855
                existingTaxons.add(bmt);
4856
            }
4857
        }
4858
        return existingTaxons;
4859
    }
4860

    
4861
    /**
4862
     * Check if the found Taxon can reasonnably be the same
4863
     * example: with and without author should match, but the subspecies should not be suggested for a genus
4864
     * */
4865
    private boolean compareTaxonNameLength(String f, String o){
4866
        //logger.info("compareTaxonNameLength");
4867
        boolean lengthOk=false;
4868
        int sizeF = f.length();
4869
        int sizeO = o.length();
4870
        if (sizeO>=sizeF) {
4871
            lengthOk=true;
4872
        }
4873
        if(sizeF>sizeO) {
4874
            if (sizeF-sizeO>10) {
4875
                lengthOk=false;
4876
            } else {
4877
                lengthOk=true;
4878
            }
4879
        }
4880

    
4881
        //        System.out.println(lengthOk+": compare "+f+" ("+f.length()+") and "+o+" ("+o.length()+")");
4882
        return lengthOk;
4883
    }
4884

    
4885
    private double similarity(String s1, String s2) {
4886
        //logger.info("similarity");
4887
        //System.out.println("similarity *"+s1+"* vs. *"+s2+"*");
4888
        if(!StringUtils.isEmpty(s1) && !StringUtils.isEmpty(s2)){
4889
            String l1=s1.toLowerCase().trim();
4890
            String l2=s2.toLowerCase().trim();
4891
            if (l1.length() < l2.length()) { // s1 should always be bigger
4892
                String swap = l1; l1 = l2; l2 = swap;
4893
            }
4894
            int bigLen = l1.length();
4895
            if (bigLen == 0) { return 1.0; /* both strings are zero length */ }
4896
            return (bigLen - computeEditDistance(l1, l2)) / (double) bigLen;
4897
        }
4898
        else{
4899
            if(s1!=null && s2!=null){
4900
                if (s1.equalsIgnoreCase(s2)) {
4901
                    return 1;
4902
                }
4903
            }
4904
            return -1;
4905
        }
4906
    }
4907

    
4908
    private int computeEditDistance(String s1, String s2) {
4909
        //logger.info("computeEditDistance");
4910
        int[] costs = new int[s2.length() + 1];
4911
        for (int i = 0; i <= s1.length(); i++) {
4912
            int lastValue = i;
4913
            for (int j = 0; j <= s2.length(); j++) {
4914
                if (i == 0) {
4915
                    costs[j] = j;
4916
                } else {
4917
                    if (j > 0) {
4918
                        int newValue = costs[j - 1];
4919
                        if (s1.charAt(i - 1) != s2.charAt(j - 1)) {
4920
                            newValue = Math.min(Math.min(newValue, lastValue),
4921
                                    costs[j]) + 1;
4922
                        }
4923
                        costs[j - 1] = lastValue;
4924
                        lastValue = newValue;
4925
                    }
4926
                }
4927
            }
4928
            if (i > 0) {
4929
                costs[s2.length()] = lastValue;
4930
            }
4931
        }
4932
        return costs[s2.length()];
4933
    }
4934

    
4935
    Map<Rank, Taxon> hierarchy = new HashMap<Rank, Taxon>();
4936
    /**
4937
     * @param taxonNameBase
4938
     */
4939
    @SuppressWarnings("rawtypes")
4940
    public void lookForParentNode(INonViralName taxonNameBase, Taxon tax, Reference ref, MyName myName) {
4941
        logger.info("lookForParentNode "+taxonNameBase.getTitleCache()+" for "+myName.toString());
4942
        //System.out.println("LOOK FOR PARENT NODE "+taxonnamebase.toString()+"; "+tax.toString()+"; "+taxonnamebase.getRank());
4943
        INonViralNameParser parser = NonViralNameParserImpl.NewInstance();
4944
        if (taxonNameBase.getRank().equals(Rank.FORM())){
4945
            handleFormHierarchy(ref, myName, parser);
4946
        }
4947
        else if (taxonNameBase.getRank().equals(Rank.VARIETY())){
4948
            handleVarietyHierarchy(ref, myName, parser);
4949
        }
4950
        else if (taxonNameBase.getRank().equals(Rank.SUBSPECIES())){
4951
            handleSubSpeciesHierarchy(ref, myName, parser);
4952
        }
4953
        else if (taxonNameBase.getRank().equals(Rank.SPECIES())){
4954
            handleSpeciesHierarchy(ref, myName, parser);
4955
        }
4956
        else if (taxonNameBase.getRank().equals(Rank.SUBGENUS())){
4957
            handleSubgenusHierarchy(ref, myName, parser);
4958
        }
4959

    
4960
        if (taxonNameBase.getRank().equals(Rank.GENUS())){
4961
            handleGenusHierarchy(ref, myName, parser);
4962
        }
4963
        if (taxonNameBase.getRank().equals(Rank.SUBTRIBE())){
4964
            handleSubtribeHierarchy(ref, myName, parser);
4965
        }
4966
        if (taxonNameBase.getRank().equals(Rank.TRIBE())){
4967
            handleTribeHierarchy(ref, myName, parser);
4968
        }
4969

    
4970
        if (taxonNameBase.getRank().equals(Rank.SUBFAMILY())){
4971
            handleSubfamilyHierarchy(ref, myName, parser);
4972
        }
4973
    }
4974

    
4975
    /**
4976
     * @param ref
4977
     * @param myName
4978
     * @param parser
4979
     */
4980
    private void handleSubfamilyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
4981
        System.out.println("handleSubfamilyHierarchy");
4982
        String parentStr = myName.getFamilyStr();
4983
        Rank r = Rank.FAMILY();
4984
        if(parentStr!=null){
4985

    
4986
            Taxon parent = null;
4987
            Pager<TaxonBase> taxontest = importer.getTaxonService().findByTitle(TaxonBase.class, parentStr, MatchMode.BEGINNING, null, null, null, null, null);
4988
            for(TaxonBase tb:taxontest.getRecords()){
4989
                try {
4990
                    if (tb.getName().getRank().equals(r)) {
4991
                        parent=CdmBase.deproxy(tb, Taxon.class);
4992
                    }
4993
                    break;
4994
                } catch (Exception e) {
4995
                    // TODO Auto-generated catch block
4996
                    e.printStackTrace();
4997
                }
4998
            }
4999
            if(parent == null) {
5000
                INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5001
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5002
                if(tmp ==null)
5003
                {
5004
                    parent=Taxon.NewInstance(parentNameName, ref);
5005
                    importer.getTaxonService().save(parent);
5006
                    parent = CdmBase.deproxy(parent, Taxon.class);
5007
                } else {
5008
                    parent=tmp;
5009
                }
5010
                lookForParentNode(parentNameName, parent, ref,myName);
5011

    
5012
            }
5013
            hierarchy.put(r,parent);
5014
        }
5015
    }
5016

    
5017
    /**
5018
     * @param ref
5019
     * @param myName
5020
     * @param parser
5021
     */
5022
    private void handleTribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5023
        String parentStr = myName.getSubfamilyStr();
5024
        Rank r = Rank.SUBFAMILY();
5025
        if (parentStr == null){
5026
            parentStr = myName.getFamilyStr();
5027
            r = Rank.FAMILY();
5028
        }
5029
        if(parentStr!=null){
5030
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5031
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5032
            //                    importer.getTaxonService().save(parent);
5033
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5034

    
5035
            boolean parentDoesNotExists = true;
5036
            for (TaxonNode p : classification.getAllNodes()){
5037
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5038
                    parentDoesNotExists = false;
5039
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5040
                    break;
5041
                }
5042
            }
5043
            //                if(parentDoesNotExists) {
5044
            //                    importer.getTaxonService().save(parent);
5045
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5046
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5047
            //                }
5048
            if(parentDoesNotExists) {
5049
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5050
                if(tmp ==null)
5051
                {
5052
                    parent=Taxon.NewInstance(parentNameName, ref);
5053
                    importer.getTaxonService().save(parent);
5054
                    parent = CdmBase.deproxy(parent, Taxon.class);
5055
                } else {
5056
                    parent=tmp;
5057
                }
5058
                lookForParentNode(parentNameName, parent, ref,myName);
5059

    
5060
            }
5061
            hierarchy.put(r,parent);
5062
        }
5063
    }
5064

    
5065
    /**
5066
     * @param ref
5067
     * @param myName
5068
     * @param parser
5069
     */
5070
    private void handleSubtribeHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5071
        String parentStr = myName.getTribeStr();
5072
        Rank r = Rank.TRIBE();
5073
        if (parentStr == null){
5074
            parentStr = myName.getSubfamilyStr();
5075
            r = Rank.SUBFAMILY();
5076
        }
5077
        if (parentStr == null){
5078
            parentStr = myName.getFamilyStr();
5079
            r = Rank.FAMILY();
5080
        }
5081
        if(parentStr!=null){
5082
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5083
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5084
            //                    importer.getTaxonService().save(parent);
5085
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5086

    
5087
            boolean parentDoesNotExists = true;
5088
            for (TaxonNode p : classification.getAllNodes()){
5089
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5090
                    parentDoesNotExists = false;
5091
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5092

    
5093
                    break;
5094
                }
5095
            }
5096
            //                if(parentDoesNotExists) {
5097
            //                    importer.getTaxonService().save(parent);
5098
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5099
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5100
            //                }
5101
            if(parentDoesNotExists) {
5102
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5103
                if(tmp ==null)
5104
                {
5105
                    parent=Taxon.NewInstance(parentNameName, ref);
5106
                    importer.getTaxonService().save(parent);
5107
                    parent = CdmBase.deproxy(parent, Taxon.class);
5108
                } else {
5109
                    parent=tmp;
5110
                }
5111
                lookForParentNode(parentNameName, parent, ref,myName);
5112

    
5113
            }
5114
            hierarchy.put(r,parent);
5115
        }
5116
    }
5117

    
5118
    /**
5119
     * @param ref
5120
     * @param myName
5121
     * @param parser
5122
     */
5123
    private void handleGenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5124
        String parentStr = myName.getSubtribeStr();
5125
        Rank r = Rank.SUBTRIBE();
5126
        if (parentStr == null){
5127
            parentStr = myName.getTribeStr();
5128
            r = Rank.TRIBE();
5129
        }
5130
        if (parentStr == null){
5131
            parentStr = myName.getSubfamilyStr();
5132
            r = Rank.SUBFAMILY();
5133
        }
5134
        if (parentStr == null){
5135
            parentStr = myName.getFamilyStr();
5136
            r = Rank.FAMILY();
5137
        }
5138
        if(parentStr!=null){
5139
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5140
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5141
            //                    importer.getTaxonService().save(parent);
5142
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5143

    
5144
            boolean parentDoesNotExist = true;
5145
            for (TaxonNode p : classification.getAllNodes()){
5146
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5147
                    //                        System.out.println(p.getTaxon().getUuid());
5148
                    //                        System.out.println(parent.getUuid());
5149
                    parentDoesNotExist = false;
5150
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5151
                    break;
5152
                }
5153
            }
5154
            //                if(parentDoesNotExists) {
5155
            //                    importer.getTaxonService().save(parent);
5156
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5157
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5158
            //                }
5159
            if(parentDoesNotExist) {
5160
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5161
                if(tmp ==null){
5162

    
5163
                    parent=Taxon.NewInstance(parentNameName, ref);
5164
                    importer.getTaxonService().save(parent);
5165
                    parent = CdmBase.deproxy(parent, Taxon.class);
5166
                } else {
5167
                    parent=tmp;
5168
                }
5169
                lookForParentNode(parentNameName, parent, ref,myName);
5170

    
5171
            }
5172
            hierarchy.put(r,parent);
5173
        }
5174
    }
5175

    
5176
    /**
5177
     * @param ref
5178
     * @param myName
5179
     * @param parser
5180
     */
5181
    private void handleSubgenusHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5182
        String parentStr = myName.getGenusStr();
5183
        Rank r = Rank.GENUS();
5184

    
5185
        if(parentStr==null){
5186
            parentStr = myName.getSubtribeStr();
5187
            r = Rank.SUBTRIBE();
5188
        }
5189
        if (parentStr == null){
5190
            parentStr = myName.getTribeStr();
5191
            r = Rank.TRIBE();
5192
        }
5193
        if (parentStr == null){
5194
            parentStr = myName.getSubfamilyStr();
5195
            r = Rank.SUBFAMILY();
5196
        }
5197
        if (parentStr == null){
5198
            parentStr = myName.getFamilyStr();
5199
            r = Rank.FAMILY();
5200
        }
5201
        if(parentStr!=null){
5202
            INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5203
            Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5204
            //                    importer.getTaxonService().save(parent);
5205
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5206

    
5207
            boolean parentDoesNotExists = true;
5208
            for (TaxonNode p : classification.getAllNodes()){
5209
                if(p.getTaxon().getTitleCache().equalsIgnoreCase(parent.getTitleCache())) {
5210
                    //                        System.out.println(p.getTaxon().getUuid());
5211
                    //                        System.out.println(parent.getUuid());
5212
                    parentDoesNotExists = false;
5213
                    parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5214
                    break;
5215
                }
5216
            }
5217
            //                if(parentDoesNotExists) {
5218
            //                    importer.getTaxonService().save(parent);
5219
            //                    parent = CdmBase.deproxy(parent, Taxon.class);
5220
            //                    lookForParentNode(parentNameName, parent, ref,myName);
5221
            //                }
5222
            if(parentDoesNotExists) {
5223
                Taxon tmp = findMatchingTaxon(parentNameName,ref);
5224
                if(tmp ==null)
5225
                {
5226
                    parent=Taxon.NewInstance(parentNameName, ref);
5227
                    importer.getTaxonService().save(parent);
5228
                    parent = CdmBase.deproxy(parent, Taxon.class);
5229
                } else {
5230
                    parent=tmp;
5231
                }
5232
                lookForParentNode(parentNameName, parent, ref,myName);
5233

    
5234
            }
5235
            hierarchy.put(r,parent);
5236
        }
5237
    }
5238

    
5239
    /**
5240
     * @param ref
5241
     * @param myName
5242
     * @param parser
5243
     */
5244
    private void handleSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5245
        String parentStr = myName.getSubgenusStr();
5246
        Rank r = Rank.SUBGENUS();
5247

    
5248
        if(parentStr==null){
5249
            parentStr = myName.getGenusStr();
5250
            r = Rank.GENUS();
5251
        }
5252

    
5253
        if(parentStr==null){
5254
            parentStr = myName.getSubtribeStr();
5255
            r = Rank.SUBTRIBE();
5256
        }
5257
        if (parentStr == null){
5258
            parentStr = myName.getTribeStr();
5259
            r = Rank.TRIBE();
5260
        }
5261
        if (parentStr == null){
5262
            parentStr = myName.getSubfamilyStr();
5263
            r = Rank.SUBFAMILY();
5264
        }
5265
        if (parentStr == null){
5266
            parentStr = myName.getFamilyStr();
5267
            r = Rank.FAMILY();
5268
        }
5269
        if(parentStr!=null){
5270
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5271
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5272
            hierarchy.put(r,parent);
5273
        }
5274
    }
5275

    
5276
    /**
5277
     * @param ref
5278
     * @param myName
5279
     * @param parser
5280
     */
5281
    private void handleSubSpeciesHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5282
        String parentStr = myName.getSpeciesStr();
5283
        Rank r = Rank.SPECIES();
5284

    
5285

    
5286
        if(parentStr==null){
5287
            parentStr = myName.getSubgenusStr();
5288
            r = Rank.SUBGENUS();
5289
        }
5290

    
5291
        if(parentStr==null){
5292
            parentStr = myName.getGenusStr();
5293
            r = Rank.GENUS();
5294
        }
5295

    
5296
        if(parentStr==null){
5297
            parentStr = myName.getSubtribeStr();
5298
            r = Rank.SUBTRIBE();
5299
        }
5300
        if (parentStr == null){
5301
            parentStr = myName.getTribeStr();
5302
            r = Rank.TRIBE();
5303
        }
5304
        if (parentStr == null){
5305
            parentStr = myName.getSubfamilyStr();
5306
            r = Rank.SUBFAMILY();
5307
        }
5308
        if (parentStr == null){
5309
            parentStr = myName.getFamilyStr();
5310
            r = Rank.FAMILY();
5311
        }
5312
        if(parentStr!=null){
5313
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5314
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5315
            hierarchy.put(r,parent);
5316
        }
5317
    }
5318

    
5319

    
5320
    /**
5321
     * @param ref
5322
     * @param myName
5323
     * @param parser
5324
     */
5325
    private void handleFormHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5326
        String parentStr = myName.getSubspeciesStr();
5327
        Rank r = Rank.SUBSPECIES();
5328

    
5329

    
5330
        if(parentStr==null){
5331
            parentStr = myName.getSpeciesStr();
5332
            r = Rank.SPECIES();
5333
        }
5334

    
5335
        if(parentStr==null){
5336
            parentStr = myName.getSubgenusStr();
5337
            r = Rank.SUBGENUS();
5338
        }
5339

    
5340
        if(parentStr==null){
5341
            parentStr = myName.getGenusStr();
5342
            r = Rank.GENUS();
5343
        }
5344

    
5345
        if(parentStr==null){
5346
            parentStr = myName.getSubtribeStr();
5347
            r = Rank.SUBTRIBE();
5348
        }
5349
        if (parentStr == null){
5350
            parentStr = myName.getTribeStr();
5351
            r = Rank.TRIBE();
5352
        }
5353
        if (parentStr == null){
5354
            parentStr = myName.getSubfamilyStr();
5355
            r = Rank.SUBFAMILY();
5356
        }
5357
        if (parentStr == null){
5358
            parentStr = myName.getFamilyStr();
5359
            r = Rank.FAMILY();
5360
        }
5361
        if(parentStr!=null){
5362
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5363
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5364
            hierarchy.put(r,parent);
5365
        }
5366
    }
5367

    
5368
    /**
5369
     * @param ref
5370
     * @param myName
5371
     * @param parser
5372
     */
5373
    private void handleVarietyHierarchy(Reference ref, MyName myName, INonViralNameParser<?> parser) {
5374
        String parentStr = myName.getSubspeciesStr();
5375
        Rank r = Rank.SUBSPECIES();
5376

    
5377
        if(parentStr==null){
5378
            parentStr = myName.getSpeciesStr();
5379
            r = Rank.SPECIES();
5380
        }
5381

    
5382
        if(parentStr==null){
5383
            parentStr = myName.getSubgenusStr();
5384
            r = Rank.SUBGENUS();
5385
        }
5386

    
5387
        if(parentStr==null){
5388
            parentStr = myName.getGenusStr();
5389
            r = Rank.GENUS();
5390
        }
5391

    
5392
        if(parentStr==null){
5393
            parentStr = myName.getSubtribeStr();
5394
            r = Rank.SUBTRIBE();
5395
        }
5396
        if (parentStr == null){
5397
            parentStr = myName.getTribeStr();
5398
            r = Rank.TRIBE();
5399
        }
5400
        if (parentStr == null){
5401
            parentStr = myName.getSubfamilyStr();
5402
            r = Rank.SUBFAMILY();
5403
        }
5404
        if (parentStr == null){
5405
            parentStr = myName.getFamilyStr();
5406
            r = Rank.FAMILY();
5407
        }
5408
        if(parentStr!=null){
5409
            Taxon parent = handleParentName(ref, myName, parser, parentStr, r);
5410
            //System.out.println("PUT IN HIERARCHY "+r+", "+parent);
5411
            hierarchy.put(r,parent);
5412
        }
5413
    }
5414

    
5415
    /**
5416
     * @param ref
5417
     * @param myName
5418
     * @param parser
5419
     * @param parentStr
5420
     * @param r
5421
     * @return
5422
     */
5423
    private Taxon handleParentName(Reference ref, MyName myName, INonViralNameParser<?> parser, String parentStr, Rank r) {
5424
        INonViralName parentNameName =  parser.parseFullName(parentStr, nomenclaturalCode, r);
5425
        Taxon parent = Taxon.NewInstance(parentNameName, ref); //sec set null
5426
        //                    importer.getTaxonService().save(parent);
5427
        //                    parent = CdmBase.deproxy(parent, Taxon.class);
5428

    
5429
        boolean parentDoesNotExists = true;
5430
        for (TaxonNode p : classification.getAllNodes()){
5431
            if(p.getTaxon().getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(parent.getTitleCache().split("sec.")[0].trim())) {
5432
                //                        System.out.println(p.getTaxon().getUuid());
5433
                //                        System.out.println(parent.getUuid());
5434
                parentDoesNotExists = false;
5435
                parent=CdmBase.deproxy(p.getTaxon(),    Taxon.class);
5436
                break;
5437
            }
5438
        }
5439
        if(parentDoesNotExists) {
5440
            Taxon tmp = findMatchingTaxon(parentNameName,ref);
5441
            //                    System.out.println("FOUND PARENT "+tmp.toString()+" for "+parentNameName.toString());
5442
            if(tmp ==null){
5443

    
5444
                parent=Taxon.NewInstance(parentNameName, ref);
5445
                importer.getTaxonService().save(parent);
5446

    
5447
            } else {
5448
                parent=tmp;
5449
            }
5450
            lookForParentNode(parentNameName, parent, ref,myName);
5451

    
5452
        }
5453
        return parent;
5454
    }
5455

    
5456
    private void addNameDifferenceToFile(String originalname, String atomisedname){
5457
        try{
5458
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NamesDifferent_"+classification.getTitleCache()+".txt",true);
5459
            BufferedWriter out = new BufferedWriter(fstream);
5460
            out.write(originalname+" (original) versus "+replaceNull(atomisedname)+" (atomised) \n");
5461
            //Close the output stream
5462
            out.close();
5463
        }catch (Exception e){//Catch exception if any
5464
            System.err.println("Error: " + e.getMessage());
5465
        }
5466
    }
5467
    /**
5468
     * @param name
5469
     * @param author
5470
     * @param nomenclaturalCode2
5471
     * @param rank
5472
     */
5473
    private void addProblemNameToFile(String name, String author, NomenclaturalCode nomenclaturalCode2, Rank rank) {
5474
        try{
5475
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed.txt",true);
5476
            BufferedWriter out = new BufferedWriter(fstream);
5477
            out.write(name+"\t"+replaceNull(author)+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\n");
5478
            //Close the output stream
5479
            out.close();
5480
        }catch (Exception e){//Catch exception if any
5481
            System.err.println("Error: " + e.getMessage());
5482
        }
5483
    }
5484

    
5485

    
5486
    /**
5487
     * @param tnb
5488
     * @param bestMatchingTaxon
5489
     * @param insertAsExisting
5490
     * @param refMods
5491
     */
5492
    private void logDecision(INonViralName tnb, Taxon bestMatchingTaxon, boolean insertAsExisting, Reference refMods) {
5493
        try{
5494
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "Decisions_"+classification.toString()+".txt", true);
5495
            BufferedWriter out = new BufferedWriter(fstream);
5496
            out.write(tnb.getTitleCache() + " sec. " + refMods + "\t" + bestMatchingTaxon.getTitleCache() + "\t" + insertAsExisting + "\n");
5497
            //Close the output stream
5498
            out.close();
5499
        }catch (Exception e){//Catch exception if any
5500
            System.err.println("Error: " + e.getMessage());
5501
        }
5502
    }
5503

    
5504

    
5505
    @SuppressWarnings("unused")
5506
    private String replaceNull(Object in){
5507
        if (in == null) {
5508
            return "";
5509
        }
5510
        if (in.getClass().equals(NomenclaturalCode.class)) {
5511
            return ((NomenclaturalCode)in).getTitleCache();
5512
        }
5513
        return in.toString();
5514
    }
5515

    
5516
    /**
5517
     * @param fullName
5518
     * @param nomenclaturalCode2
5519
     * @param rank
5520
     */
5521
    private void addProblemNameToFile(String type, String name, NomenclaturalCode nomenclaturalCode2, Rank rank, String problems) {
5522
        try{
5523
            FileWriter fstream = new FileWriter(TaxonXImport.LOG_FOLDER + "NameNotParsed_"+classification.getTitleCache()+".txt",true);
5524
            BufferedWriter out = new BufferedWriter(fstream);
5525
            out.write(type+"\t"+name+"\t"+replaceNull(nomenclaturalCode2)+"\t"+replaceNull(rank)+"\t"+problems+"\n");
5526
            //Close the output stream
5527
            out.close();
5528
        }catch (Exception e){//Catch exception if any
5529
            System.err.println("Error: " + e.getMessage());
5530
        }
5531

    
5532
    }
5533

    
5534
}
5535

    
5536

    
5537

    
(8-8/9)