Project

General

Profile

Revision 8b77762f

ID8b77762f8645c742253e132bbc78a1e6eeabd8fa
Parent ca8c03d8
Child b9b87c1f

Added by Patrick Plitzner about 6 years ago

  • refactored data structure for taxon identifications in ABCD import
    • trimming scientific names avoids creation of multiple taxa/taxon nodes
    • added check for number of taxon nodes to test

View differences:

.gitattributes
442 442
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/specimen/abcd206/in/Abcd206ImportConfigurator.java -text
443 443
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/specimen/abcd206/in/Abcd206ImportState.java -text
444 444
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/specimen/abcd206/in/Abcd206XMLFieldGetter.java -text
445
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/specimen/abcd206/in/Identification.java -text
445 446
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/specimen/excel/in/ExtensionTypeExcelImport.java -text
446 447
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/specimen/excel/in/NamedAreaLevelExcelImport.java -text
447 448
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/specimen/excel/in/NamedAreaLevellRow.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/specimen/abcd206/in/Abcd206DataHolder.java
47 47
	protected int altitude;
48 48

  
49 49

  
50
	protected List<String> identificationList;
50
	protected List<Identification> identificationList;
51 51
	protected List<SpecimenTypeDesignationStatus> statusList;
52 52
	protected List<HashMap<String, String>> atomisedIdentificationList;
53 53
	protected List<String> namedAreaList;
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/specimen/abcd206/in/Abcd206Import.java
737 737
                    break;
738 738
                }
739 739
            }
740
            dataHolder.identificationList = new ArrayList<String>();
740
            dataHolder.identificationList = new ArrayList<Identification>();
741 741
            dataHolder.statusList = new ArrayList<SpecimenTypeDesignationStatus>();
742 742
            dataHolder.atomisedIdentificationList = new ArrayList<HashMap<String, String>>();
743 743
            dataHolder.referenceList = new ArrayList<String[]>();
......
1048 1048
        indAssociation.setAssociatedSpecimenOrObservation(derivedUnitBase);
1049 1049
        indAssociation.setFeature(feature);
1050 1050

  
1051
        //<<<<<<< .courant
1052 1051
        if (state.getConfig().isInteractWithUser()){
1053 1052
            sourceMap = new HashMap<String, OriginalSourceBase<?>>();
1054 1053

  
......
1543 1542
        return taxon;
1544 1543
    }
1545 1544

  
1546

  
1547

  
1548

  
1549

  
1550

  
1551

  
1552 1545
    /**
1553 1546
     * HandleIdentifications : get the scientific names present in the ABCD
1554 1547
     * document and store link them with the observation/specimen data
......
1556 1549
     * @param derivedUnitFacade : the current derivedunitfacade
1557 1550
     */
1558 1551
    private void handleIdentifications(Abcd206ImportState state, DerivedUnitFacade derivedUnitFacade) {
1559
//        System.out.println("The reference from handleidentification "+ref);
1560 1552
        Abcd206ImportConfigurator config = state.getConfig();
1561 1553

  
1562
        String fullScientificNameString;
1563 1554
        Taxon taxon = null;
1564 1555

  
1565 1556
        String scientificName = "";
1566 1557
        boolean preferredFlag = false;
1567 1558

  
1568
        List<String> scientificNames = new ArrayList<String>();
1569 1559
        if (dataHolder.nomenclatureCode == ""){
1570 1560
            dataHolder.nomenclatureCode = config.getNomenclaturalCode().toString();
1571 1561
        }
1572 1562

  
1573 1563
        for (int i = 0; i < dataHolder.identificationList.size(); i++) {
1564
            Identification identification = dataHolder.identificationList.get(i);
1565
            scientificName = identification.getScientificName().replaceAll(" et ", " & ");
1574 1566

  
1575
            fullScientificNameString = dataHolder.identificationList.get(i);
1576
            fullScientificNameString = fullScientificNameString.replaceAll(" et ", " & ");
1577

  
1578
            if (fullScientificNameString.indexOf(PREFERRED) != -1) {
1579
                scientificName = fullScientificNameString.split(PREFERRED)[0];
1580
                String pTmp = fullScientificNameString.split(PREFERRED)[1].split(CODE)[0];
1581
                if (pTmp.equals("1") || pTmp.toLowerCase().indexOf("true") != -1) {
1582
                    preferredFlag = true;
1583
                }
1584
                else {
1585
                    preferredFlag = false;
1586
                }
1567
            String preferred = identification.getPreferred();
1568
            if (preferred.equals("1") || preferred.toLowerCase().indexOf("true") != -1 || dataHolder.identificationList.size()==1) {
1569
                preferredFlag = true;
1587 1570
            }
1588 1571
            else {
1589
                scientificName = fullScientificNameString;
1572
                preferredFlag = false;
1590 1573
            }
1591
            if(DEBUG) {
1592
                logger.info("fullscientificname " + fullScientificNameString + ", *" + dataHolder.nomenclatureCode + "*");
1593
            }
1594
            if (fullScientificNameString.indexOf(CODE) != -1) {
1595
                if (fullScientificNameString.indexOf(':') != -1) {
1596
                    dataHolder.nomenclatureCode = fullScientificNameString.split(CODE)[1].split(COLON)[1];
1597
                }
1598
                else{
1599
                    dataHolder.nomenclatureCode = fullScientificNameString.split(CODE)[1];
1600
                }
1574

  
1575
            if (identification.getCode().indexOf(':') != -1) {
1576
                dataHolder.nomenclatureCode = identification.getCode().split(COLON)[1];
1601 1577
            }
1602
            scientificNames.add(scientificName+SPLITTER+preferredFlag+SPLITTER+i);
1603
        }
1604
        for (String name:scientificNames) {
1605
            scientificName = name.split(SPLITTER)[0];
1606
            String pref = name.split(SPLITTER)[1];
1607
            String index = name.split(SPLITTER)[2];
1608
            if (pref.equalsIgnoreCase("true") || scientificNames.size()==1) {
1609
                preferredFlag = true;
1610
            } else {
1611
                preferredFlag =false;
1578
            else{
1579
                dataHolder.nomenclatureCode = identification.getCode();
1612 1580
            }
1613
            taxon = getTaxon(state, scientificName,Integer.parseInt(index),null);
1581
            taxon = getTaxon(state, scientificName, i,null);
1614 1582
            addTaxonNode(taxon, state,preferredFlag);
1615 1583
            linkDeterminationEvent(state, taxon, preferredFlag, derivedUnitFacade);
1616 1584
        }
......
1628 1596
                if(p.getTaxon().equals(taxon)) {
1629 1597
                    exist =true;
1630 1598
                }
1631
            }catch(Exception e){logger.warn("TaxonNode doesn't seem to have a taxon");}
1599
            }
1600
            catch(Exception e){
1601
                logger.warn("TaxonNode doesn't seem to have a taxon");
1602
            }
1632 1603
        }
1633 1604
        if (!exist){
1634 1605
            addParentTaxon(taxon, state, preferredFlag);
......
1641 1612
     * @param state: the ABCD import state
1642 1613
     */
1643 1614
    private void addParentTaxon(Taxon taxon, Abcd206ImportState state, boolean preferredFlag){
1644
//        System.out.println("addParentTaxon " + taxon.getTitleCache());
1645

  
1646 1615
        NonViralName<?>  nvname = CdmBase.deproxy(taxon.getName(), NonViralName.class);
1647 1616
        Rank rank = nvname.getRank();
1648 1617
        Taxon genus =null;
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/specimen/abcd206/in/Abcd206XMLFieldGetter.java
84 84
    protected void getScientificNames(NodeList group) {
85 85
        NodeList identifications, results;
86 86
        String tmpName = null;
87
        boolean nameFound = false;
88 87

  
89 88
        for (int j = 0; j < group.getLength(); j++) {
90 89
            if (group.item(j).getNodeName().equals(prefix + "Identification")) {
......
103 102
                        if (dataHolder.nomenclatureCode != null&& dataHolder.nomenclatureCode != "") {
104 103
                            // logger.info("TMP NAME P" + tmpName);
105 104

  
106
                            dataHolder.identificationList.add(tmpName+ "_preferred_"+ identifications.item(m).getTextContent()+ "_code_" + dataHolder.nomenclatureCode);
105
                            dataHolder.identificationList.add(new Identification(tmpName, identifications.item(m).getTextContent(), dataHolder.nomenclatureCode));
107 106
                        } else {
108
                            dataHolder.identificationList.add(tmpName+ "_preferred_"+ identifications.item(m).getTextContent());
107
                            dataHolder.identificationList.add(new Identification(tmpName, identifications.item(m).getTextContent()));
109 108
                        }
110 109
                        path = identifications.item(m).getNodeName();
111 110
                        // getHierarchie(identifications.item(m));
112 111
                        dataHolder.knownABCDelements.add(path);
113 112
                        path = "";
114
                        try {
115
                            dataHolder.identificationList.remove(tmpName);
116
                        } catch (Exception e) {
117
                            logger.info("ohooooooooooo:" + e);
118
                        }
119 113
                    } else if (identifications.item(m).getNodeName().equals(prefix + "References")) {
120 114
                        this.getReferences(identifications.item(m));
121 115
                    }
......
144 138
                if (!hasPref && tmpName != null) {
145 139
                    if (dataHolder.nomenclatureCode != null
146 140
                            && dataHolder.nomenclatureCode != "") {
147
                        dataHolder.identificationList.add(tmpName+ "_preferred_" + "0" + "_code_"+ dataHolder.nomenclatureCode);
141
                        dataHolder.identificationList.add(new Identification(tmpName, "0", dataHolder.nomenclatureCode));
148 142
                    } else {
149
                        dataHolder.identificationList.add(tmpName+ "_preferred_" + "0");
150
                    }
151
                    try {
152
                        dataHolder.identificationList.remove(tmpName);
153
                    } catch (Exception e) {
154
                        logger.info("ohooooooooooo:" + e);
143
                        dataHolder.identificationList.add(new Identification(tmpName, "0"));
155 144
                    }
156 145
                }
157 146
            }
......
184 173
                    }
185 174
                    if (scnames.item(n).getNodeName().equals(prefix + "NameAtomised")) {
186 175
                        try {
187
                            if (scnames.item(n).hasChildNodes()) {String tmp = scnames.item(n).getChildNodes().item(1).getNodeName();if (tmp.indexOf(prefix) != -1&& prefix.length() > 0) {
188
                                dataHolder.nomenclatureCode = tmp.split(prefix)[1];
189
                            } else {dataHolder.nomenclatureCode = scnames.item(n).getChildNodes().item(1).getNodeName();}
176
                            if (scnames.item(n).hasChildNodes()) {
177
                                String tmp = scnames.item(n).getChildNodes().item(1).getNodeName();
178
                                if (tmp.indexOf(prefix) != -1&& prefix.length() > 0) {
179
                                    dataHolder.nomenclatureCode = tmp.split(prefix)[1];
180
                                }
181
                                else {
182
                                    dataHolder.nomenclatureCode = scnames.item(n).getChildNodes().item(1).getNodeName();
183
                                }
190 184
                            }
191 185
                        } catch (Exception e) {
192 186
                            if(DEBUG) {
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/specimen/abcd206/in/Identification.java
1
// $Id$
2
/**
3
* Copyright (C) 2014 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.io.specimen.abcd206.in;
11

  
12
/**
13
 * @author pplitzner
14
 * @date Sep 16, 2014
15
 *
16
 */
17
public class Identification {
18

  
19
    private final String scientificName;
20
    private final String preferred;
21
    private final String code;
22

  
23
    public Identification(String taxonName, String preferred) {
24
        this(taxonName, preferred, null);
25
    }
26

  
27
    public Identification(String scientificName, String preferred, String code) {
28
        super();
29
        this.scientificName = scientificName.trim();
30
        this.preferred = preferred.trim();
31
        this.code = code.trim();
32
    }
33

  
34
    /**
35
     * @return the taxonName
36
     */
37
    public String getScientificName() {
38
        return scientificName;
39
    }
40

  
41
    /**
42
     * @return the preferred
43
     */
44
    public String getPreferred() {
45
        return preferred;
46
    }
47

  
48
    /**
49
     * @return the code
50
     */
51
    public String getCode() {
52
        return code;
53
    }
54

  
55
    /* (non-Javadoc)
56
     * @see java.lang.Object#toString()
57
     */
58
    @Override
59
    public String toString() {
60
        return "Identification [taxonName=" + scientificName + ", preferred=" + preferred + ", code=" + code + "]";
61
    }
62

  
63
}
cdmlib-io/src/test/java/eu/etaxonomy/cdm/io/specimen/abcd206/in/SpecimenImportConfiguratorTest.java
13 13
import static org.junit.Assert.assertNotNull;
14 14
import static org.junit.Assert.assertTrue;
15 15

  
16
import java.io.FileNotFoundException;
17 16
import java.net.URISyntaxException;
18 17
import java.net.URL;
19 18

  
......
27 26
import eu.etaxonomy.cdm.api.service.ICommonService;
28 27
import eu.etaxonomy.cdm.api.service.INameService;
29 28
import eu.etaxonomy.cdm.api.service.IOccurrenceService;
29
import eu.etaxonomy.cdm.api.service.ITaxonNodeService;
30 30
import eu.etaxonomy.cdm.api.service.ITermService;
31 31
import eu.etaxonomy.cdm.io.common.CdmApplicationAwareDefaultImport;
32 32
import eu.etaxonomy.cdm.io.common.IImportConfigurator;
33 33
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
34 34
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
35
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
35 36
import eu.etaxonomy.cdm.test.integration.CdmTransactionalIntegrationTest;
36 37

  
37 38
/**
......
55 56
	@SpringBeanByType
56 57
    ICommonService commonService;
57 58

  
59
	@SpringBeanByType
60
	ITaxonNodeService taxonNodeService;
61

  
58 62

  
59 63
	private IImportConfigurator configurator;
60 64
	private IImportConfigurator configurator2;
......
121 125
        boolean result = defaultImport.invoke(configurator2);
122 126
        assertTrue("Return value for import.invoke should be true", result);
123 127
        assertEquals("Number of TaxonNames is incorrect", 2, nameService.count(TaxonNameBase.class));
128
        /*
129
         * Classification
130
         * - Cichorium
131
         *   - Cichorium calvum
132
         */
133
        assertEquals("Number of TaxonNames is incorrect", 3, taxonNodeService.count(TaxonNode.class));
124 134
/*<<<<<<< .courant
125 135
        assertEquals("Number of specimen and observation is incorrect", 10, occurrenceService.count(DerivedUnitBase.class));
126 136
        try {

Also available in: Unified diff

Add picture from clipboard (Maximum size: 40 MB)