Project

General

Profile

« Previous | Next » 

Revision cc9428e1

Added by Andreas Müller over 5 years ago

ref #7420 first version of phycobank higher classification import

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/app/common/CdmDestinations.java
62 62
		return makeDestination(dbType, cdmServer, cdmDB, -1, cdmUserName, null);
63 63
	}
64 64

  
65
	public static ICdmDataSource cdm_local_test_mysql(){
65
    public static ICdmDataSource cdm_local_test_mysql(){
66
        DatabaseTypeEnum dbType = DatabaseTypeEnum.MySQL;
67
        String cdmServer = "127.0.0.1";
68
        String cdmDB = "test";
69
        String cdmUserName = "root";
70
        return makeDestination(dbType, cdmServer, cdmDB, -1, cdmUserName, null);
71
    }
72

  
73
	public static ICdmDataSource cdm_local_redlist_gefaesspflanzen(){
66 74
		DatabaseTypeEnum dbType = DatabaseTypeEnum.MySQL;
67 75
		String cdmServer = "127.0.0.1";
68 76
		String cdmDB = "rl2020_gefaesspflanzen";
app-import/src/main/java/eu/etaxonomy/cdm/app/iapt/IAPTActivator.java
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

  
10
package eu.etaxonomy.cdm.app.iapt;
11

  
12
import java.io.File;
13
import java.net.URI;
14
import java.net.URISyntaxException;
15
import java.util.UUID;
16

  
17
import org.apache.log4j.Logger;
18

  
19
import eu.etaxonomy.cdm.app.common.CdmDestinations;
20
import eu.etaxonomy.cdm.common.monitor.DefaultProgressMonitor;
21
import eu.etaxonomy.cdm.database.DatabaseTypeEnum;
22
import eu.etaxonomy.cdm.database.DbSchemaValidation;
23
import eu.etaxonomy.cdm.database.ICdmDataSource;
24
import eu.etaxonomy.cdm.io.common.CdmDefaultImport;
25
import eu.etaxonomy.cdm.io.common.IImportConfigurator.CHECK;
26
import eu.etaxonomy.cdm.io.iapt.IAPTImportConfigurator;
27
import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
28
import eu.etaxonomy.cdm.model.reference.Reference;
29
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
30

  
31

  
32
/**
33
 * @author a.kohlbecker
34
 * @since Jul 26, 2016
35
 *
36
 */
37
public class IAPTActivator {
38
    private static final Logger logger = Logger.getLogger(IAPTActivator.class);
39

  
40
    public static final String DATA_FILE_FULL = "Registration_DB_from_BGBM17.xls";
41
    public static final String DATA_FILE_0_100 = "iapt-100.xls";
42
    public static final String DATA_ENCODING_PROBLEMS = "encoding-problems.xls";
43
    public static final String DATA_IAPT_TYPES_100 = "iapt-types-100.xls";
44
    public static final String DATA_TYPE_LEG_100 = "iapt-type-leg-100.xls";
45
    public static final String DATA_NAME_TYPES = "iapt-name-types.xls";
46
    public static final String DATA_SINGLE = "single.xls";
47
    public static final String DATA_FILE = DATA_FILE_FULL;
48

  
49
    public static final Boolean algaeOnly = false;
50

  
51
    // ====================================================================================
52

  
53
    //database validation status (create, update, validate ...)
54
    static DbSchemaValidation hbm2dll = DbSchemaValidation.CREATE;
55

  
56
    static ICdmDataSource cdmDestination = null;
57
    static {
58
        DatabaseTypeEnum dbType = DatabaseTypeEnum.MySQL;
59
        String cdmServer = "127.0.0.1";
60
        String cdmDB = "cdm_algea_registry";
61
        String cdmUserName = "edit";
62
        cdmDestination =  CdmDestinations.makeDestination(dbType, cdmServer, cdmDB, -1, cdmUserName, null);
63
        // cdmDestination = CdmDestinations.localH2();
64
    }
65

  
66
    static boolean invers = true;
67

  
68
    static boolean include = !invers;
69

  
70
    //classification
71
    static final UUID classificationUuid = UUID.fromString("8c51efb4-3d67-4bea-8f87-4bc1cba1310d");
72
    private static final String classificationName = "IAPT";
73
    static final String sourceReferenceTitle = "IAPT Import";
74

  
75
    //check - import
76
    static final CHECK check = CHECK.IMPORT_WITHOUT_CHECK;
77

  
78

  
79
    private void doImport(ICdmDataSource cdmDestination){
80

  
81
        URI source = fileURI();
82

  
83
        Reference secRef = ReferenceFactory.newDatabase();
84
        secRef.setTitle("IAPT");
85

  
86
        Reference sourceRef = ReferenceFactory.newDatabase();
87
        sourceRef.setTitle("IAPT Registration of Plant Names Database");
88
        sourceRef.setDatePublished(VerbatimTimePeriod.NewVerbatimInstance(1998, 2016));
89
        sourceRef.setOrganization("International Association for Plant Taxonomy");
90
        try {
91
            sourceRef.setUri(new URI("http://archive.bgbm.org/scripts/ASP/registration/regSearch.asp"));
92
        } catch (URISyntaxException e) {
93
            e.printStackTrace();
94
        }
95

  
96

  
97
        //make Source
98
        IAPTImportConfigurator config= IAPTImportConfigurator.NewInstance(source, cdmDestination);
99
        config.setClassificationUuid(classificationUuid);
100
        config.setClassificationName(classificationName);
101
        config.setCheck(check);
102
        config.setDbSchemaValidation(hbm2dll);
103
        config.setSourceReference(sourceRef);
104
        config.setSecReference(secRef);
105
        config.setProgressMonitor(DefaultProgressMonitor.NewInstance());
106
        config.setDoAlgeaeOnly(algaeOnly);
107
        // config.setBatchSize(100); // causes Error during managed flush [Don't change the reference to a collection with delete-orphan enabled : eu.etaxonomy.cdm.model.taxon.TaxonNode.annotations]
108

  
109
        CdmDefaultImport<IAPTImportConfigurator> myImport = new CdmDefaultImport<>();
110

  
111
        doSingleSource(fileURI(), config, myImport);
112

  
113
        System.exit(0);
114

  
115
    }
116

  
117
    /**
118
     * @param source
119
     * @param config
120
     * @param myImport
121
     */
122
    private void doSingleSource(URI source, IAPTImportConfigurator config, CdmDefaultImport<IAPTImportConfigurator> myImport) {
123
        config.setSource(source);
124
        String fileName = source.toString();
125
        fileName = fileName.substring(fileName.lastIndexOf("/") + 1 );
126

  
127
        String message = "Start import from ("+ fileName + ") ...";
128
        System.out.println(message);
129
        logger.warn(message);
130
        config.setSourceReference(getSourceReference(fileName));
131
        myImport.invoke(config);
132

  
133
        System.out.println("End import from ("+ source.toString() + ")...");
134
    }
135

  
136
    private final Reference inRef = ReferenceFactory.newGeneric();
137
    private Reference getSourceReference(String string) {
138
        Reference result = ReferenceFactory.newGeneric();
139
        result.setTitleCache(string, true);
140
        result.setInReference(inRef);
141
        inRef.setTitleCache(sourceReferenceTitle, true);
142
        return result;
143
    }
144

  
145

  
146

  
147
    public static URI fileURI() {
148
        File f = new File(System.getProperty("user.home") + "/data/Projekte/Algea Name Registry/registry/sources/IAPT/" + DATA_FILE);
149
        return f.toURI();
150
    }
151

  
152
    /**
153
     * @param args
154
     */
155
    public static void main(String[] args) {
156
        IAPTActivator me = new IAPTActivator();
157
        me.doImport(cdmDestination);
158
    }
159

  
160
}
app-import/src/main/java/eu/etaxonomy/cdm/app/phycobank/IAPTActivator.java
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

  
10
package eu.etaxonomy.cdm.app.phycobank;
11

  
12
import java.io.File;
13
import java.net.URI;
14
import java.net.URISyntaxException;
15
import java.util.UUID;
16

  
17
import org.apache.log4j.Logger;
18

  
19
import eu.etaxonomy.cdm.app.common.CdmDestinations;
20
import eu.etaxonomy.cdm.common.monitor.DefaultProgressMonitor;
21
import eu.etaxonomy.cdm.database.DatabaseTypeEnum;
22
import eu.etaxonomy.cdm.database.DbSchemaValidation;
23
import eu.etaxonomy.cdm.database.ICdmDataSource;
24
import eu.etaxonomy.cdm.io.common.CdmDefaultImport;
25
import eu.etaxonomy.cdm.io.common.IImportConfigurator.CHECK;
26
import eu.etaxonomy.cdm.io.phycobank.IAPTImportConfigurator;
27
import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
28
import eu.etaxonomy.cdm.model.reference.Reference;
29
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
30

  
31

  
32
/**
33
 * @author a.kohlbecker
34
 * @since Jul 26, 2016
35
 *
36
 */
37
public class IAPTActivator {
38
    private static final Logger logger = Logger.getLogger(IAPTActivator.class);
39

  
40
    public static final String DATA_FILE_FULL = "Registration_DB_from_BGBM17.xls";
41
    public static final String DATA_FILE_0_100 = "iapt-100.xls";
42
    public static final String DATA_ENCODING_PROBLEMS = "encoding-problems.xls";
43
    public static final String DATA_IAPT_TYPES_100 = "iapt-types-100.xls";
44
    public static final String DATA_TYPE_LEG_100 = "iapt-type-leg-100.xls";
45
    public static final String DATA_NAME_TYPES = "iapt-name-types.xls";
46
    public static final String DATA_SINGLE = "single.xls";
47
    public static final String DATA_FILE = DATA_FILE_FULL;
48

  
49
    public static final Boolean algaeOnly = false;
50

  
51
    // ====================================================================================
52

  
53
    //database validation status (create, update, validate ...)
54
    static DbSchemaValidation hbm2dll = DbSchemaValidation.CREATE;
55

  
56
    static ICdmDataSource cdmDestination = null;
57
    static {
58
        DatabaseTypeEnum dbType = DatabaseTypeEnum.MySQL;
59
        String cdmServer = "127.0.0.1";
60
        String cdmDB = "cdm_algea_registry";
61
        String cdmUserName = "edit";
62
        cdmDestination =  CdmDestinations.makeDestination(dbType, cdmServer, cdmDB, -1, cdmUserName, null);
63
        // cdmDestination = CdmDestinations.localH2();
64
    }
65

  
66
    static boolean invers = true;
67

  
68
    static boolean include = !invers;
69

  
70
    //classification
71
    static final UUID classificationUuid = UUID.fromString("8c51efb4-3d67-4bea-8f87-4bc1cba1310d");
72
    private static final String classificationName = "IAPT";
73
    static final String sourceReferenceTitle = "IAPT Import";
74

  
75
    //check - import
76
    static final CHECK check = CHECK.IMPORT_WITHOUT_CHECK;
77

  
78

  
79
    private void doImport(ICdmDataSource cdmDestination){
80

  
81
        URI source = fileURI();
82

  
83
        Reference secRef = ReferenceFactory.newDatabase();
84
        secRef.setTitle("IAPT");
85

  
86
        Reference sourceRef = ReferenceFactory.newDatabase();
87
        sourceRef.setTitle("IAPT Registration of Plant Names Database");
88
        sourceRef.setDatePublished(VerbatimTimePeriod.NewVerbatimInstance(1998, 2016));
89
        sourceRef.setOrganization("International Association for Plant Taxonomy");
90
        try {
91
            sourceRef.setUri(new URI("http://archive.bgbm.org/scripts/ASP/registration/regSearch.asp"));
92
        } catch (URISyntaxException e) {
93
            e.printStackTrace();
94
        }
95

  
96

  
97
        //make Source
98
        IAPTImportConfigurator config= IAPTImportConfigurator.NewInstance(source, cdmDestination);
99
        config.setClassificationUuid(classificationUuid);
100
        config.setClassificationName(classificationName);
101
        config.setCheck(check);
102
        config.setDbSchemaValidation(hbm2dll);
103
        config.setSourceReference(sourceRef);
104
        config.setSecReference(secRef);
105
        config.setProgressMonitor(DefaultProgressMonitor.NewInstance());
106
        config.setDoAlgeaeOnly(algaeOnly);
107
        // config.setBatchSize(100); // causes Error during managed flush [Don't change the reference to a collection with delete-orphan enabled : eu.etaxonomy.cdm.model.taxon.TaxonNode.annotations]
108

  
109
        CdmDefaultImport<IAPTImportConfigurator> myImport = new CdmDefaultImport<>();
110

  
111
        doSingleSource(fileURI(), config, myImport);
112

  
113
        System.exit(0);
114

  
115
    }
116

  
117
    /**
118
     * @param source
119
     * @param config
120
     * @param myImport
121
     */
122
    private void doSingleSource(URI source, IAPTImportConfigurator config, CdmDefaultImport<IAPTImportConfigurator> myImport) {
123
        config.setSource(source);
124
        String fileName = source.toString();
125
        fileName = fileName.substring(fileName.lastIndexOf("/") + 1 );
126

  
127
        String message = "Start import from ("+ fileName + ") ...";
128
        System.out.println(message);
129
        logger.warn(message);
130
        config.setSourceReference(getSourceReference(fileName));
131
        myImport.invoke(config);
132

  
133
        System.out.println("End import from ("+ source.toString() + ")...");
134
    }
135

  
136
    private final Reference inRef = ReferenceFactory.newGeneric();
137
    private Reference getSourceReference(String string) {
138
        Reference result = ReferenceFactory.newGeneric();
139
        result.setTitleCache(string, true);
140
        result.setInReference(inRef);
141
        inRef.setTitleCache(sourceReferenceTitle, true);
142
        return result;
143
    }
144

  
145

  
146

  
147
    public static URI fileURI() {
148
        File f = new File(System.getProperty("user.home") + "/data/Projekte/Algea Name Registry/registry/sources/IAPT/" + DATA_FILE);
149
        return f.toURI();
150
    }
151

  
152
    /**
153
     * @param args
154
     */
155
    public static void main(String[] args) {
156
        IAPTActivator me = new IAPTActivator();
157
        me.doImport(cdmDestination);
158
    }
159

  
160
}
app-import/src/main/java/eu/etaxonomy/cdm/app/phycobank/PhycobankHigherClassificationActivator.java
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

  
10
package eu.etaxonomy.cdm.app.phycobank;
11

  
12
import java.net.URI;
13
import java.util.Date;
14
import java.util.UUID;
15

  
16
import org.apache.log4j.Logger;
17

  
18
import eu.etaxonomy.cdm.app.common.CdmDestinations;
19
import eu.etaxonomy.cdm.common.monitor.DefaultProgressMonitor;
20
import eu.etaxonomy.cdm.database.DbSchemaValidation;
21
import eu.etaxonomy.cdm.database.ICdmDataSource;
22
import eu.etaxonomy.cdm.io.common.CdmDefaultImport;
23
import eu.etaxonomy.cdm.io.common.IImportConfigurator.CHECK;
24
import eu.etaxonomy.cdm.io.phycobank.PhycobankHigherClassificationImportConfigurator;
25
import eu.etaxonomy.cdm.model.agent.Person;
26
import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
27
import eu.etaxonomy.cdm.model.reference.Reference;
28
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
29
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
30

  
31

  
32
/**
33
 * Activator to import phycobank higher classifications.
34
 * @author a.mueller
35
 * @since 2018-08-09
36
 */
37
public class PhycobankHigherClassificationActivator {
38
    @SuppressWarnings("unused")
39
    private static final Logger logger = Logger.getLogger(PhycobankHigherClassificationActivator.class);
40

  
41

  
42
    // ====================================================================================
43

  
44
    //database validation status (create, update, validate ...)
45
    static DbSchemaValidation hbm2dll = DbSchemaValidation.VALIDATE;
46

  
47
//    static ICdmDataSource cdmDestination = CdmDestinations.localH2();
48
    static ICdmDataSource cdmDestination = CdmDestinations.cdm_local_test_mysql();
49

  
50
    //check - import
51
    static final CHECK check = CHECK.IMPORT_WITHOUT_CHECK;
52

  
53

  
54
    private void doImport(ICdmDataSource cdmDestination){
55

  
56
        URI source = fileURI();
57

  
58
        Reference sourceRef = getSourceReference();
59
//        Reference secRef = getSecReference_Frey();
60
        Reference secRef = getSecReference_WoRMS();
61

  
62
        //make Source
63
        PhycobankHigherClassificationImportConfigurator config= PhycobankHigherClassificationImportConfigurator.NewInstance(source, cdmDestination);
64
        config.setCheck(check);
65
        config.setDbSchemaValidation(hbm2dll);
66
        config.setSourceReference(sourceRef);
67
        config.setSecReference(secRef);
68
        config.setProgressMonitor(DefaultProgressMonitor.NewInstance());
69

  
70
        CdmDefaultImport<PhycobankHigherClassificationImportConfigurator> myImport = new CdmDefaultImport<>();
71
        myImport.invoke(config);
72

  
73
        System.exit(0);
74

  
75
    }
76

  
77
    private Reference getSecReference_Frey() {
78
        Reference result = ReferenceFactory.newBook();
79
        result.setTitle("Syllabus of the plant families");
80
        result.setDatePublished(VerbatimTimePeriod.NewVerbatimInstance(2015));
81
        result.setPublisher("Borntraeger");
82
        result.setPlacePublished("Stuttgart");
83
        Person author = Person.NewInstance();
84
        author.setFamilyName("Frey");
85
        author.setInitials("W.");
86
        result.setAuthorship(author);
87
        result.setUuid(UUID.fromString("2b4a3a67-e432-4d6b-b716-081045179df9"));
88
        return result;
89
    }
90

  
91
    private Reference getSecReference_WoRMS() {
92
        Reference result = ReferenceFactory.newDatabase();
93
        result.setTitle("WoRMS World Register of Marine Species");
94
        result.setDatePublished(TimePeriodParser.parseStringVerbatim("2018-04-20"));
95
        result.setUri(URI.create("http://www.marinespecies.org/index.php"));
96
        result.setUuid(UUID.fromString("b33daeb0-8770-4ee2-92d0-80aaa87bfba2"));
97
        return result;
98
    }
99

  
100
    private Reference getSourceReference() {
101
        Reference result = ReferenceFactory.newDatabase();
102
        result.setTitle("Higher classification Excel import: " + fileName());
103
        result.setUri(fileURI());
104
        result.setDatePublished(VerbatimTimePeriod.NewVerbatimInstance(new Date(), null));
105
        return result;
106
    }
107

  
108
    public static String fileName(){
109
        return "Algen_Syllabus_NormalImplied_Test.xlsx";
110
    }
111
    public static String filePath(){
112
        return "file:////BGBM-PESIHPC/Phycobank/";
113
    }
114

  
115
    public static URI fileURI() {
116
        return URI.create(filePath() + fileName());
117
    }
118

  
119
    /**
120
     * @param args
121
     */
122
    public static void main(String[] args) {
123
        PhycobankHigherClassificationActivator me = new PhycobankHigherClassificationActivator();
124
        me.doImport(cdmDestination);
125
    }
126

  
127
}
app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java
1
/**
2
 * Copyright (C) 2007 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9

  
10
package eu.etaxonomy.cdm.io.iapt;
11

  
12
import java.util.ArrayList;
13
import java.util.Arrays;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.List;
17
import java.util.Map;
18
import java.util.Set;
19
import java.util.UUID;
20
import java.util.regex.Matcher;
21
import java.util.regex.Pattern;
22

  
23
import org.apache.commons.lang.ArrayUtils;
24
import org.apache.commons.lang.StringEscapeUtils;
25
import org.apache.commons.lang.StringUtils;
26
import org.apache.log4j.Level;
27
import org.apache.log4j.Logger;
28
import org.joda.time.DateTimeFieldType;
29
import org.joda.time.Partial;
30
import org.joda.time.format.DateTimeFormat;
31
import org.joda.time.format.DateTimeFormatter;
32
import org.springframework.stereotype.Component;
33

  
34
import com.fasterxml.jackson.core.JsonProcessingException;
35
import com.fasterxml.jackson.databind.ObjectMapper;
36

  
37
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
38
import eu.etaxonomy.cdm.common.CdmUtils;
39
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
40
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
41
import eu.etaxonomy.cdm.model.agent.Institution;
42
import eu.etaxonomy.cdm.model.agent.Person;
43
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
44
import eu.etaxonomy.cdm.model.common.Annotation;
45
import eu.etaxonomy.cdm.model.common.AnnotationType;
46
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
47
import eu.etaxonomy.cdm.model.common.Extension;
48
import eu.etaxonomy.cdm.model.common.ExtensionType;
49
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
50
import eu.etaxonomy.cdm.model.common.Language;
51
import eu.etaxonomy.cdm.model.common.LanguageString;
52
import eu.etaxonomy.cdm.model.common.Marker;
53
import eu.etaxonomy.cdm.model.common.MarkerType;
54
import eu.etaxonomy.cdm.model.common.OriginalSourceType;
55
import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
56
import eu.etaxonomy.cdm.model.name.IBotanicalName;
57
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
58
import eu.etaxonomy.cdm.model.name.NameTypeDesignation;
59
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
60
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
61
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
62
import eu.etaxonomy.cdm.model.name.Rank;
63
import eu.etaxonomy.cdm.model.name.RankClass;
64
import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
65
import eu.etaxonomy.cdm.model.name.TaxonName;
66
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
67
import eu.etaxonomy.cdm.model.occurrence.Collection;
68
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
69
import eu.etaxonomy.cdm.model.occurrence.FieldUnit;
70
import eu.etaxonomy.cdm.model.occurrence.GatheringEvent;
71
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
72
import eu.etaxonomy.cdm.model.reference.Reference;
73
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
74
import eu.etaxonomy.cdm.model.taxon.Classification;
75
import eu.etaxonomy.cdm.model.taxon.ITaxonTreeNode;
76
import eu.etaxonomy.cdm.model.taxon.Synonym;
77
import eu.etaxonomy.cdm.model.taxon.SynonymType;
78
import eu.etaxonomy.cdm.model.taxon.Taxon;
79
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
80
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
81

  
82
/**
83
 * @author a.mueller
84
 * @since 05.01.2016
85
 */
86

  
87
@Component("iAPTExcelImport")
88
public class IAPTExcelImport<CONFIG extends IAPTImportConfigurator> extends SimpleExcelTaxonImport<CONFIG> {
89
    private static final long serialVersionUID = -747486709409732371L;
90
    private static final Logger logger = Logger.getLogger(IAPTExcelImport.class);
91
    public static final String ANNOTATION_MARKER_STRING = "[*]";
92

  
93

  
94
    private static UUID ROOT_UUID = UUID.fromString("4137fd2a-20f6-4e70-80b9-f296daf51d82");
95

  
96
    private static NonViralNameParserImpl nameParser = NonViralNameParserImpl.NewInstance();
97

  
98
    private final static String REGISTRATIONNO_PK= "RegistrationNo_Pk";
99
    private final static String HIGHERTAXON= "HigherTaxon";
100
    private final static String FULLNAME= "FullName";
101
    private final static String AUTHORSSPELLING= "AuthorsSpelling";
102
    private final static String LITSTRING= "LitString";
103
    private final static String REGISTRATION= "Registration";
104
    private final static String TYPE= "Type";
105
    private final static String CAVEATS= "Caveats";
106
    private final static String FULLBASIONYM= "FullBasionym";
107
    private final static String FULLSYNSUBST= "FullSynSubst";
108
    private final static String NOTESTXT= "NotesTxt";
109
    private final static String REGDATE= "RegDate";
110
    private final static String NAMESTRING= "NameString";
111
    private final static String BASIONYMSTRING= "BasionymString";
112
    private final static String SYNSUBSTSTR= "SynSubstStr";
113
    private final static String AUTHORSTRING= "AuthorString";
114

  
115
    private  static List<String> expectedKeys= Arrays.asList(new String[]{
116
            REGISTRATIONNO_PK, HIGHERTAXON, FULLNAME, AUTHORSSPELLING, LITSTRING, REGISTRATION, TYPE, CAVEATS, FULLBASIONYM, FULLSYNSUBST, NOTESTXT, REGDATE, NAMESTRING, BASIONYMSTRING, SYNSUBSTSTR, AUTHORSTRING});
117

  
118
    private static final Pattern nomRefTokenizeP = Pattern.compile("^(?<title>.*):\\s(?<detail>[^\\.:]+)\\.(?<date>.*?)(?:\\s\\((?<issue>[^\\)]*)\\)\\s*)?\\.?$");
119
    private static final Pattern[] datePatterns = new Pattern[]{
120
            // NOTE:
121
            // The order of the patterns is extremely important!!!
122
            //
123
            // all patterns cover the years 1700 - 1999
124
            Pattern.compile("^(?<year>1[7,8,9][0-9]{2})$"), // only year, like '1969'
125
            Pattern.compile("^(?<monthName>\\p{L}+\\.?)\\s(?<day>[0-9]{1,2})(?:st|rd|th)?\\.?,?\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like April 12, 1969 or april 12th 1999
126
            Pattern.compile("^(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // April 99 or April, 1999 or Apr. 12
127
            Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(\\s?)(?<month>[0-1]?[0-9])\\2\\3(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12.04.1969 or 12. 04. 1969 or 12/04/1969 or 12-04-1969
128
            Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(?<monthName>[IVX]{1,2})\\2(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12-VI-1969
129
            Pattern.compile("^(?:(?<day>[0-9]{1,2})(?:\\sde)?\\s)?(?<monthName>\\p{L}+)(?:\\sde)?\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full and partial date like 12 de Enero de 1999 or Enero de 1999
130
            Pattern.compile("^(?<month>[0-1]?[0-9])([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like 04.1969 or 04/1969 or 04-1969
131
            Pattern.compile("^(?<year>(?:1[7,8,9])?[0-9]{2})([\\.\\-/])(?<month>[0-1]?[0-9])$"),//  partial date like 1999-04
132
            Pattern.compile("^(?<monthName>[IVX]{1,2})([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like VI-1969
133
            Pattern.compile("^(?<day>[0-9]{1,2})(?:[\\./]|th|rd|st)?\\s(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12. April 1969 or april 1999 or 22 Dec.1999
134
        };
135
    protected static final Pattern typeSpecimenSplitPattern =  Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype[^:]*:(?<isotype>.*)\\.?)?\\.?$");
136

  
137
    private static final Pattern typeNameBasionymPattern =  Pattern.compile("\\([Bb]asionym\\s?\\:\\s?(?<basionymName>[^\\)]*).*$");
138
    private static final Pattern typeNameNotePattern =  Pattern.compile("\\[([^\\[]*)"); // matches the inner of '[...]'
139
    private static final Pattern typeNameSpecialSplitPattern =  Pattern.compile("(?<note>.*\\;.*?)\\:(?<agent>)\\;(<name>.*)");
140

  
141
    protected static final Pattern collectorPattern =  Pattern.compile(".*?(?<fullStr1>\\([Ll]eg\\.\\s+(?<data1>[^\\)]*)\\)).*$|.*?(?<fullStr2>\\s[Ll]eg\\.\\:?\\s+(?<data2>.*?)\\.?)$|^(?<fullStr3>[Ll]eg\\.\\:?\\s+(?<data3>.*?)\\.?)");
142
    private static final Pattern collectionDataPattern =  Pattern.compile("^(?<collector>[^,]*),\\s?(?<detail>.*?)\\.?$");
143
    private static final Pattern collectorsNumber =  Pattern.compile("^([nN]o\\.\\s.*)$");
144

  
145
    // AccessionNumbers: , #.*, n°:?, 96/3293, No..*, -?\w{1,3}-[0-9\-/]*
146
    private static final Pattern accessionNumberOnlyPattern = Pattern.compile("^(?<accNumber>(?:n°\\:?\\s?|#|No\\.?\\s?)?[\\d\\w\\-/]*)$");
147

  
148
    private static final Pattern[] specimenTypePatterns = new Pattern[]{
149
            Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:\\((?<institute>.*[^\\)])\\))(?<accNumber>.*)?$"), // like: GAUF (Gansu Agricultural University) No. 1207-1222
150
            Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:Coll\\.\\s(?<subCollection>[^\\.,;]*)(.))(?<accNumber>.*)?$"), // like KASSEL Coll. Krasske, Praep. DII 78
151
            Pattern.compile("^(?:in\\s)?(?<institute>[Cc]oll\\.\\s.*?)(?:\\s+(?<accNumber>(Praep\\.|slide|No\\.|Inv\\. Nr\\.|Nr\\.).*))?$"), // like Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany Praep. Neukaledonien OTL 62
152
            Pattern.compile("^(?<institute>Inst\\.\\s.*?)\\s+(?<accNumber>N\\s.*)?$"), // like Inst. Geological Sciences, Acad. Sci. Belarus, Minsk N 212 A
153
            Pattern.compile("^(?<colCode>[A-Z]+)(?:\\s+(?<accNumber>.*))?$"), // identifies the Collection code and takes the rest as accessionNumber if any
154
    };
155

  
156

  
157
    private static final Pattern registrationPattern = Pattern.compile("^Registration date\\:\\s(?<regdate>\\d\\d\\.\\d\\d\\.\\d\\d); no\\.\\:\\s(?<regid>\\d+);\\soffice\\:\\s(?<office>.*?)\\.(?:\\s\\[Form no\\.\\:\\s(?<formNo>d+)\\])?$"); // Registration date: 29.06.98; no.: 2922; office: Berlin.
158

  
159
    private static Map<String, Integer> monthFromNameMap = new HashMap<>();
160

  
161
    static {
162
        String[] ck = new String[]{"leden", "únor", "březen", "duben", "květen", "červen", "červenec ", "srpen", "září", "říjen", "listopad", "prosinec"};
163
        String[] fr = new String[]{"janvier", "février", "mars", "avril", "mai", "juin", "juillet", "août", "septembre", "octobre", "novembre", "décembre"};
164
        String[] de = new String[]{"januar", "februar", "märz", "april", "mai", "juni", "juli", "august", "september", "oktober", "november", "dezember"};
165
        String[] en = new String[]{"january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december"};
166
        String[] it = new String[]{"gennaio", "febbraio", "marzo", "aprile", "maggio", "giugno", "luglio", "agosto", "settembre", "ottobre", "novembre", "dicembre"};
167
        String[] sp = new String[]{"enero", "febrero", "marzo", "abril", "mayo", "junio", "julio", "agosto", "septiembre", "octubre", "noviembre", "diciembre"};
168
        String[] de_abbrev = new String[]{"jan.", "feb.", "märz", "apr.", "mai", "jun.", "jul.", "aug.", "sept.", "okt.", "nov.", "dez."};
169
        String[] en_abbrev = new String[]{"jan.", "feb.", "mar.", "apr.", "may", "jun.", "jul.", "aug.", "sep.", "oct.", "nov.", "dec."};
170
        String[] port = new String[]{"Janeiro", "Fevereiro", "Março", "Abril", "Maio", "Junho", "Julho", "Agosto", "Setembro", "Outubro", "Novembro", "Dezembro"};
171
        String[] rom_num = new String[]{"i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii"};
172

  
173
        String[][] perLang =  new String[][]{ck, de, fr, en, it, sp, port, de_abbrev, en_abbrev, rom_num};
174

  
175
        for (String[] months: perLang) {
176
            for(int m = 1; m < 13; m++){
177
                monthFromNameMap.put(months[m - 1].toLowerCase(), m);
178
            }
179
        }
180

  
181
        // special cases
182
        monthFromNameMap.put("mar", 3);
183
        monthFromNameMap.put("dec", 12);
184
        monthFromNameMap.put("februari", 2);
185
        monthFromNameMap.put("març", 3);
186
    }
187

  
188

  
189
    DateTimeFormatter formatterYear = DateTimeFormat.forPattern("yyyy");
190

  
191
    private Map<String, Collection> collectionMap = new HashMap<>();
192

  
193
    private ExtensionType extensionTypeIAPTRegData = null;
194

  
195
    private Set<String> nameSet = new HashSet<>();
196
    private DefinedTermBase duplicateRegistration = null;
197

  
198
    enum TypesName {
199
        fieldUnit, holotype, isotype;
200

  
201
        public SpecimenTypeDesignationStatus status(){
202
            switch (this) {
203
                case holotype:
204
                    return SpecimenTypeDesignationStatus.HOLOTYPE();
205
                case isotype:
206
                    return SpecimenTypeDesignationStatus.ISOTYPE();
207
                default:
208
                    return null;
209
            }
210
        }
211
    }
212

  
213
    private MarkerType markerTypeFossil = null;
214
    private Rank rankUnrankedSupraGeneric = null;
215
    private Rank familyIncertisSedis = null;
216
    private AnnotationType annotationTypeCaveats = null;
217

  
218
    private Reference bookVariedadesTradicionales = null;
219

  
220
    /**
221
     * HACK for unit simple testing
222
     */
223
    boolean _testMode = System.getProperty("TEST_MODE") != null;
224

  
225
    private Taxon makeTaxon(HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state,
226
                            TaxonNode higherTaxonNode, boolean isFossil) {
227

  
228
        String regNumber = getValue(record, REGISTRATIONNO_PK, false);
229
        String regStr = getValue(record, REGISTRATION, true);
230
        String titleCacheStr = getValue(record, FULLNAME, true);
231
        String nameStr = getValue(record, NAMESTRING, true);
232
        String authorStr = getValue(record, AUTHORSTRING, true);
233
        String nomRefStr = getValue(record, LITSTRING, true);
234
        String authorsSpelling = getValue(record, AUTHORSSPELLING, true);
235
        String notesTxt = getValue(record, NOTESTXT, true);
236
        String caveats = getValue(record, CAVEATS, true);
237
        String fullSynSubstStr = getValue(record, FULLSYNSUBST, true);
238
        String fullBasionymStr = getValue(record, FULLBASIONYM, true);
239
        String basionymNameStr = getValue(record, FULLBASIONYM, true);
240
        String synSubstStr = getValue(record, SYNSUBSTSTR, true);
241
        String typeStr = getValue(record, TYPE, true);
242

  
243

  
244
        String nomRefTitle = null;
245
        String nomRefDetail;
246
        String nomRefPupDate = null;
247
        String nomRefIssue = null;
248
        Partial pupDate = null;
249

  
250
        boolean restoreOriginalReference = false;
251
        boolean nameIsValid = true;
252

  
253
        // preprocess nomRef: separate citation, reference detail, publishing date
254
        if(!StringUtils.isEmpty(nomRefStr)){
255
            nomRefStr = nomRefStr.trim();
256

  
257
            // handle the special case which is hard to parse:
258
            //
259
            // Las variedades tradicionales de frutales de la Cuenca del Río Segura. Catálogo Etnobotánico (1): Frutos secos, oleaginosos, frutales de hueso, almendros y frutales de pepita: 154. 1997.
260
            if(nomRefStr.startsWith("Las variedades tradicionales de frutales ")){
261

  
262
                if(bookVariedadesTradicionales == null){
263
                    bookVariedadesTradicionales = ReferenceFactory.newBook();
264
                    bookVariedadesTradicionales.setTitle("Las variedades tradicionales de frutales de la Cuenca del Río Segura. Catálogo Etnobotánico (1): Frutos secos, oleaginosos, frutales de hueso, almendros y frutales de pepita");
265
                    bookVariedadesTradicionales.setDatePublished(VerbatimTimePeriod.NewVerbatimInstance(1997));
266
                    getReferenceService().save(bookVariedadesTradicionales);
267
                }
268
                nomRefStr = nomRefStr.replaceAll("^.*?\\:.*?\\:", "Las variedades tradicionales:");
269
                restoreOriginalReference = true;
270
            }
271

  
272
            Matcher m = nomRefTokenizeP.matcher(nomRefStr);
273
            if(m.matches()){
274
                nomRefTitle = m.group("title");
275
                nomRefDetail = m.group("detail");
276
                nomRefPupDate = m.group("date").trim();
277
                nomRefIssue = m.group("issue");
278

  
279
                pupDate = parseDate(regNumber, nomRefPupDate);
280
                if (pupDate != null) {
281
                    nomRefTitle = nomRefTitle + ": " + nomRefDetail + ". " + pupDate.toString(formatterYear) + ".";
282
                } else {
283
                    logger.warn(csvReportLine(regNumber, "Pub date", nomRefPupDate, "in", nomRefStr, "not parsable"));
284
                }
285
            } else {
286
                nomRefTitle = nomRefStr;
287
            }
288
        }
289

  
290
        TaxonName taxonName = makeBotanicalName(state, regNumber, titleCacheStr, nameStr, authorStr, nomRefTitle);
291

  
292
        // always add the original strings of parsed data as annotation
293
        taxonName.addAnnotation(Annotation.NewInstance("imported and parsed data strings:" +
294
                        "\n -  '" + LITSTRING + "': "+ nomRefStr +
295
                        "\n -  '" + TYPE + "': " + typeStr +
296
                        "\n -  '" + REGISTRATION  + "': " + regStr
297
                , AnnotationType.TECHNICAL(), Language.DEFAULT()));
298

  
299
        if(restoreOriginalReference){
300
            taxonName.setNomenclaturalReference(bookVariedadesTradicionales);
301
        }
302

  
303
        if(taxonName.getNomenclaturalReference() != null){
304
            if(pupDate != null) {
305
                taxonName.getNomenclaturalReference().setDatePublished(VerbatimTimePeriod.NewVerbatimInstance(pupDate));
306
            }
307
            if(nomRefIssue != null) {
308
                taxonName.getNomenclaturalReference().setVolume(nomRefIssue);
309
            }
310
        }
311

  
312

  
313
        if(!StringUtils.isEmpty(notesTxt)){
314
            notesTxt = notesTxt.replace("Notes: ", "").trim();
315
            taxonName.addAnnotation(Annotation.NewInstance(notesTxt, AnnotationType.EDITORIAL(), Language.DEFAULT()));
316
            nameIsValid = false;
317

  
318
        }
319
        if(!StringUtils.isEmpty(caveats)){
320
            caveats = caveats.replace("Caveats: ", "").trim();
321
            taxonName.addAnnotation(Annotation.NewInstance(caveats, annotationTypeCaveats(), Language.DEFAULT()));
322
            nameIsValid = false;
323
        }
324

  
325
        if(nameIsValid){
326
            // Status is always considered valid if no notes and cavets are set
327
            taxonName.addStatus(NomenclaturalStatus.NewInstance(NomenclaturalStatusType.VALID()));
328
        }
329

  
330
        getNameService().save(taxonName);
331

  
332
        // Namerelations
333
        if(!StringUtils.isEmpty(authorsSpelling)){
334
            authorsSpelling = authorsSpelling.replaceFirst("Author's spelling:", "").replaceAll("\"", "").trim();
335

  
336
            String[] authorSpellingTokens = StringUtils.split(authorsSpelling, " ");
337
            String[] nameStrTokens = StringUtils.split(nameStr, " ");
338

  
339
            ArrayUtils.reverse(authorSpellingTokens);
340
            ArrayUtils.reverse(nameStrTokens);
341

  
342
            for (int i = 0; i < nameStrTokens.length; i++){
343
                if(i < authorSpellingTokens.length){
344
                    nameStrTokens[i] = authorSpellingTokens[i];
345
                }
346
            }
347
            ArrayUtils.reverse(nameStrTokens);
348

  
349
            String misspelledNameStr = StringUtils.join (nameStrTokens, ' ');
350
            // build the fullnameString of the misspelled name
351
            misspelledNameStr = taxonName.getTitleCache().replace(nameStr, misspelledNameStr);
352

  
353
            TaxonName misspelledName = nameParser.parseReferencedName(misspelledNameStr, NomenclaturalCode.ICNAFP, null);
354
            misspelledName.addRelationshipToName(taxonName, NameRelationshipType.MISSPELLING(), null);
355
            getNameService().save(misspelledName);
356
        }
357

  
358
        // Replaced Synonyms
359
        if(!StringUtils.isEmpty(fullSynSubstStr)){
360
            fullSynSubstStr = fullSynSubstStr.replace("Syn. subst.: ", "");
361
            TaxonName replacedSynonymName = makeBotanicalName(state, regNumber, fullSynSubstStr, synSubstStr, null, null);
362
            replacedSynonymName.addReplacedSynonym(taxonName, null, null, null);
363
            getNameService().save(replacedSynonymName);
364
        }
365

  
366
        Reference sec = state.getConfig().getSecReference();
367
        Taxon taxon = Taxon.NewInstance(taxonName, sec);
368

  
369
        // Basionym
370
        if(fullBasionymStr != null){
371
            fullBasionymStr = fullBasionymStr.replaceAll("^\\w*:\\s", ""); // Strip off the leading 'Basionym: "
372
            basionymNameStr = basionymNameStr.replaceAll("^\\w*:\\s", ""); // Strip off the leading 'Basionym: "
373
            TaxonName basionym = makeBotanicalName(state, regNumber, fullBasionymStr, basionymNameStr, null, null);
374
            getNameService().save(basionym);
375
            taxonName.addBasionym(basionym);
376

  
377
            Synonym syn = Synonym.NewInstance(basionym, sec);
378
            taxon.addSynonym(syn, SynonymType.HOMOTYPIC_SYNONYM_OF());
379
            getTaxonService().save(syn);
380
        }
381

  
382
        // Markers
383
        if(isFossil){
384
            taxon.addMarker(Marker.NewInstance(markerTypeFossil(), true));
385
        }
386
        if(!nameSet.add(titleCacheStr)){
387
            taxonName.addMarker(Marker.NewInstance(markerDuplicateRegistration(), true));
388
            logger.warn(csvReportLine(regNumber, "Duplicate registration of", titleCacheStr));
389
        }
390

  
391

  
392
        // Types
393
        if(!StringUtils.isEmpty(typeStr)){
394

  
395
            if(taxonName.getRank().isSpecies() || taxonName.getRank().isLower(Rank.SPECIES())) {
396
                makeSpecimenTypeData(typeStr, taxonName, regNumber, state, false);
397
            } else {
398
                makeNameTypeData(typeStr, taxonName, regNumber, state);
399
            }
400
        }
401

  
402
        getTaxonService().save(taxon);
403

  
404
        if(taxonName.getRank().equals(Rank.SPECIES()) || taxonName.getRank().isLower(Rank.SPECIES())){
405
            // try to find the genus, it should have been imported already, Genera are coming first in the import file
406
            Taxon genus = ((IAPTImportState)state).getGenusTaxonMap().get(taxonName.getGenusOrUninomial());
407
            if(genus != null){
408
                higherTaxonNode = genus.getTaxonNodes().iterator().next();
409
            } else {
410
                logger.info(csvReportLine(regNumber, "Parent genus not found for", nameStr));
411
            }
412
        }
413

  
414
        if(higherTaxonNode != null){
415
            higherTaxonNode.addChildTaxon(taxon, null, null);
416
            getTaxonNodeService().save(higherTaxonNode);
417
        }
418

  
419
        if(taxonName.getRank().isGenus()){
420
            ((IAPTImportState)state).getGenusTaxonMap().put(taxonName.getGenusOrUninomial(), taxon);
421
        }
422

  
423
        return taxon;
424
    }
425

  
426
    private void makeSpecimenTypeData(String typeStr, TaxonName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state, boolean isFossil) {
427

  
428
        Matcher m = typeSpecimenSplitPattern.matcher(typeStr);
429

  
430
        if(m.matches()){
431
            String fieldUnitStr = m.group(TypesName.fieldUnit.name());
432
            // boolean isFieldUnit = typeStr.matches(".*([°']|\\d+\\s?m\\s|\\d+\\s?km\\s).*"); // check for location or unit m, km // makes no sense!!!!
433
            FieldUnit fieldUnit = parseFieldUnit(fieldUnitStr, regNumber, state);
434
            if(fieldUnit == null) {
435
                // create a field unit with only a titleCache using the fieldUnitStr substring
436
                logger.warn(csvReportLine(regNumber, "Type: fieldUnitStr can not be parsed", fieldUnitStr));
437
                fieldUnit = FieldUnit.NewInstance();
438
                fieldUnit.setTitleCache(fieldUnitStr, true);
439
                getOccurrenceService().save(fieldUnit);
440
            }
441
            getOccurrenceService().save(fieldUnit);
442

  
443
            SpecimenOrObservationType specimenType;
444
            if(isFossil){
445
                specimenType = SpecimenOrObservationType.Fossil;
446
            } else {
447
                specimenType = SpecimenOrObservationType.PreservedSpecimen;
448
            }
449

  
450
            // all others ..
451
            addSpecimenTypes(taxonName, fieldUnit, m.group(TypesName.holotype.name()), TypesName.holotype, false, regNumber, specimenType);
452
            addSpecimenTypes(taxonName, fieldUnit, m.group(TypesName.isotype.name()), TypesName.isotype, true, regNumber, specimenType);
453

  
454
        } else {
455
            // create a field unit with only a titleCache using the full typeStr
456
            FieldUnit fieldUnit = FieldUnit.NewInstance();
457
            fieldUnit.setTitleCache(typeStr, true);
458
            getOccurrenceService().save(fieldUnit);
459
            logger.warn(csvReportLine(regNumber, "Type: field 'Type' can not be parsed", typeStr));
460
        }
461
        getNameService().save(taxonName);
462
    }
463

  
464
    private void makeNameTypeData(String typeStr, IBotanicalName taxonName, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
465

  
466
        String nameStr = typeStr.replaceAll("^Type\\s?\\:\\s?", "");
467
        if(nameStr.isEmpty()) {
468
            return;
469
        }
470

  
471
        String basionymNameStr = null;
472
        String noteStr = null;
473
        String agentStr = null;
474

  
475
        Matcher m;
476

  
477
        if(typeStr.startsWith("not to be indicated")){
478
            // Special case:
479
            // Type: not to be indicated (Art. H.9.1. Tokyo Code); stated parent genera: Hechtia Klotzsch; Deuterocohnia Mez
480
            // FIXME
481
            m = typeNameSpecialSplitPattern.matcher(nameStr);
482
            if(m.matches()){
483
                nameStr = m.group("name");
484
                noteStr = m.group("note");
485
                agentStr = m.group("agent");
486
                // TODO better import of agent?
487
                if(agentStr != null){
488
                    noteStr = noteStr + ": " + agentStr;
489
                }
490
            }
491
        } else {
492
            // Generic case
493
            m = typeNameBasionymPattern.matcher(nameStr);
494
            if (m.find()) {
495
                basionymNameStr = m.group("basionymName");
496
                if (basionymNameStr != null) {
497
                    nameStr = nameStr.replace(m.group(0), "");
498
                }
499
            }
500

  
501
            m = typeNameNotePattern.matcher(nameStr);
502
            if (m.find()) {
503
                noteStr = m.group(1);
504
                if (noteStr != null) {
505
                    nameStr = nameStr.replace(m.group(0), "");
506
                }
507
            }
508
        }
509

  
510
        TaxonName typeName = (TaxonName) nameParser.parseFullName(nameStr, NomenclaturalCode.ICNAFP, null);
511

  
512
        if(typeName.isProtectedTitleCache() || typeName.getNomenclaturalReference() != null && typeName.getNomenclaturalReference().isProtectedTitleCache()) {
513
            logger.warn(csvReportLine(regNumber, "NameType not parsable", typeStr, nameStr));
514
        }
515

  
516
        if(basionymNameStr != null){
517
            TaxonName basionymName = (TaxonName) nameParser.parseFullName(nameStr, NomenclaturalCode.ICNAFP, null);
518
            getNameService().save(basionymName);
519
            typeName.addBasionym(basionymName);
520
        }
521

  
522

  
523
        NameTypeDesignation nameTypeDesignation = NameTypeDesignation.NewInstance();
524
        nameTypeDesignation.setTypeName(typeName);
525
        getNameService().save(typeName);
526

  
527
        if(noteStr != null){
528
            nameTypeDesignation.addAnnotation(Annotation.NewInstance(noteStr, AnnotationType.EDITORIAL(), Language.UNKNOWN_LANGUAGE()));
529
        }
530
        taxonName.addNameTypeDesignation(typeName, null, null, null, null, false);
531

  
532
    }
533

  
534
    /**
535
     * Currently only parses the collector, fieldNumber and the collection date.
536
     *
537
     * @param fieldUnitStr
538
     * @param regNumber
539
     * @param state
540
     * @return null if the fieldUnitStr could not be parsed
541
     */
542
    protected FieldUnit parseFieldUnit(String fieldUnitStr, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
543

  
544
        FieldUnit fieldUnit = null;
545

  
546
        Matcher m1 = collectorPattern.matcher(fieldUnitStr);
547
        if(m1.matches()){
548

  
549
            String collectorData = m1.group(2); // like ... (leg. Metzeltin, 30. 9. 1996)
550
            String removal = m1.group(1);
551
            if(collectorData == null){
552
                collectorData = m1.group(4); // like ... leg. Metzeltin, 30. 9. 1996
553
                removal = m1.group(3);
554
            }
555
            if(collectorData == null){
556
                collectorData = m1.group(6); // like ^leg. J. J. Halda 18.3.1997$
557
                removal = null;
558
            }
559
            if(collectorData == null){
560
                return null;
561
            }
562

  
563
            // the fieldUnitStr is parsable
564
            // remove all collectorData from the fieldUnitStr and use the rest as locality
565
            String locality = null;
566
            if(removal != null){
567
                locality = fieldUnitStr.replace(removal, "");
568
            }
569

  
570
            String collectorStr = null;
571
            String detailStr = null;
572
            Partial date = null;
573
            String fieldNumber = null;
574

  
575
            Matcher m2 = collectionDataPattern.matcher(collectorData);
576
            if(m2.matches()){
577
                collectorStr = m2.group("collector");
578
                detailStr = m2.group("detail");
579

  
580
                // Try to make sense of the detailStr
581
                if(detailStr != null){
582
                    detailStr = detailStr.trim();
583
                    // 1. try to parse as date
584
                    date = parseDate(regNumber, detailStr);
585
                    if(date == null){
586
                        // 2. try to parse as number
587
                        if(collectorsNumber.matcher(detailStr).matches()){
588
                            fieldNumber = detailStr;
589
                        }
590
                    }
591
                }
592
                if(date == null && fieldNumber == null){
593
                    // detailed parsing not possible, so need fo fallback
594
                    collectorStr = collectorData;
595
                }
596
            }
597

  
598
            if(collectorStr == null) {
599
                collectorStr = collectorData;
600
            }
601

  
602
            fieldUnit = FieldUnit.NewInstance();
603
            GatheringEvent ge = GatheringEvent.NewInstance();
604
            if(locality != null){
605
                ge.setLocality(LanguageString.NewInstance(locality, Language.UNKNOWN_LANGUAGE()));
606
            }
607

  
608
            TeamOrPersonBase agent =  state.getAgentBase(collectorStr);
609
            if(agent == null) {
610
                agent = Person.NewTitledInstance(collectorStr);
611
                getAgentService().save(agent);
612
                state.putAgentBase(collectorStr, agent);
613
            }
614
            ge.setCollector(agent);
615

  
616
            if(date != null){
617
                ge.setGatheringDate(date);
618
            }
619

  
620
            getEventBaseService().save(ge);
621
            fieldUnit.setGatheringEvent(ge);
622

  
623
            if(fieldNumber != null) {
624
                fieldUnit.setFieldNumber(fieldNumber);
625
            }
626
            getOccurrenceService().save(fieldUnit);
627

  
628
        }
629

  
630
        return fieldUnit;
631
    }
632

  
633
    protected Partial parseDate(String regNumber, String dateStr) {
634

  
635
        Partial pupDate = null;
636
        boolean parseError = false;
637

  
638
        String day = null;
639
        String month = null;
640
        String monthName = null;
641
        String year = null;
642

  
643
        for(Pattern p : datePatterns){
644
            Matcher m2 = p.matcher(dateStr);
645
            if(m2.matches()){
646
                try {
647
                    year = m2.group("year");
648
                } catch (IllegalArgumentException e){
649
                    // named capture group not found
650
                }
651
                try {
652
                    month = m2.group("month");
653
                } catch (IllegalArgumentException e){
654
                    // named capture group not found
655
                }
656

  
657
                try {
658
                    monthName = m2.group("monthName");
659
                    month = monthFromName(monthName, regNumber);
660
                    if(month == null){
661
                        parseError = true;
662
                    }
663
                } catch (IllegalArgumentException e){
664
                    // named capture group not found
665
                }
666
                try {
667
                    day = m2.group("day");
668
                } catch (IllegalArgumentException e){
669
                    // named capture group not found
670
                }
671

  
672
                if(year != null){
673
                    if (year.length() == 2) {
674
                        // it is an abbreviated year from the 19** years
675
                        year = "19" + year;
676
                    }
677
                    break;
678
                } else {
679
                    parseError = true;
680
                }
681
            }
682
        }
683
        if(year == null){
684
            parseError = true;
685
        }
686
        List<DateTimeFieldType> types = new ArrayList<>();
687
        List<Integer> values = new ArrayList<>();
688
        if(!parseError) {
689
            types.add(DateTimeFieldType.year());
690
            values.add(Integer.parseInt(year));
691
            if (month != null) {
692
                types.add(DateTimeFieldType.monthOfYear());
693
                values.add(Integer.parseInt(month));
694
            }
695
            if (day != null) {
696
                types.add(DateTimeFieldType.dayOfMonth());
697
                values.add(Integer.parseInt(day));
698
            }
699
            pupDate = new Partial(types.toArray(new DateTimeFieldType[types.size()]), ArrayUtils.toPrimitive(values.toArray(new Integer[values.size()])));
700
        }
701
        return pupDate;
702
    }
703

  
704
    private String monthFromName(String monthName, String regNumber) {
705

  
706
        Integer month = monthFromNameMap.get(monthName.toLowerCase());
707
        if(month == null){
708
            logger.warn(csvReportLine(regNumber, "Unknown month name", monthName));
709
            return null;
710
        } else {
711
            return month.toString();
712
        }
713
    }
714

  
715

  
716
    private void addSpecimenTypes(IBotanicalName taxonName, FieldUnit fieldUnit, String typeStr, TypesName typeName, boolean multiple, String regNumber, SpecimenOrObservationType specimenType){
717

  
718
        if(StringUtils.isEmpty(typeStr)){
719
            return;
720
        }
721
        typeStr = typeStr.trim().replaceAll("\\.$", "");
722

  
723
        Collection collection = null;
724
        DerivedUnit specimen = null;
725

  
726
        List<DerivedUnit> specimens = new ArrayList<>();
727
        if(multiple){
728
            String[] tokens = typeStr.split("\\s?,\\s?");
729
            for (String t : tokens) {
730
                // command to  list all complex parsabel types:
731
                // csvcut -t -c RegistrationNo_Pk,Type iapt.csv | csvgrep -c Type -m "Holotype" | egrep -o 'Holotype:\s([A-Z]*\s)[^.]*?'
732
                // csvcut -t -c RegistrationNo_Pk,Type iapt.csv | csvgrep -c Type -m "Holotype" | egrep -o 'Isotype[^:]*:\s([A-Z]*\s)[^.]*?'
733

  
734
                if(!t.isEmpty()){
735
                    // trying to parse the string
736
                    specimen = parseSpecimenType(fieldUnit, typeName, collection, t, regNumber);
737
                    if(specimen != null){
738
                        specimens.add(specimen);
739
                    } else {
740
                        // parsing was not successful make simple specimen
741
                        specimens.add(makeSpecimenType(fieldUnit, t, specimenType));
742
                    }
743
                }
744
            }
745
        } else {
746
            specimen = parseSpecimenType(fieldUnit, typeName, collection, typeStr, regNumber);
747
            if(specimen != null) {
748
                specimens.add(specimen);
749
                // remember current collection
750
                collection = specimen.getCollection();
751
            } else {
752
                // parsing was not successful make simple specimen
753
                specimens.add(makeSpecimenType(fieldUnit, typeStr, SpecimenOrObservationType.PreservedSpecimen));
754
            }
755
        }
756

  
757
        for(DerivedUnit s : specimens){
758
            taxonName.addSpecimenTypeDesignation(s, typeName.status(), null, null, null, false, true);
759
       }
760
    }
761

  
762
    private DerivedUnit makeSpecimenType(FieldUnit fieldUnit, String titleCache, SpecimenOrObservationType specimenType) {
763
        DerivedUnit specimen;DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(specimenType, fieldUnit);
764
        facade.setTitleCache(titleCache.trim(), true);
765
        specimen = facade.innerDerivedUnit();
766
        return specimen;
767
    }
768

  
769
    /**
770
     *
771
     * @param fieldUnit
772
     * @param typeName
773
     * @param collection
774
     * @param text
775
     * @param regNumber
776
     * @return
777
     */
778
    protected DerivedUnit parseSpecimenType(FieldUnit fieldUnit, TypesName typeName, Collection collection, String text, String regNumber) {
779

  
780
        DerivedUnit specimen = null;
781

  
782
        String collectionCode = null;
783
        String collectionTitle = null;
784
        String subCollectionStr = null;
785
        String instituteStr = null;
786
        String accessionNumber = null;
787

  
788
        boolean unusualAccessionNumber = false;
789

  
790
        text = text.trim();
791

  
792
        // 1.  For Isotypes often the accession number is noted alone if the
793
        //     preceeding entry has a collection code.
794
        if(typeName .equals(TypesName.isotype) && collection != null){
795
            Matcher m = accessionNumberOnlyPattern.matcher(text);
796
            if(m.matches()){
797
                try {
798
                    accessionNumber = m.group("accNumber");
799
                    specimen = makeSpecimenType(fieldUnit, collection, accessionNumber);
800
                } catch (IllegalArgumentException e){
801
                    // match group acc_number not found
802
                }
803
            }
804
        }
805

  
806
        //2. try it the 'normal' way
807
        if(specimen == null) {
808
            for (Pattern p : specimenTypePatterns) {
809
                Matcher m = p.matcher(text);
810
                if (m.matches()) {
811
                    // collection code or collectionTitle is mandatory
812
                    try {
813
                        collectionCode = m.group("colCode");
814
                    } catch (IllegalArgumentException e){
815
                        // match group colCode not found
816
                    }
817

  
818
                    try {
819
                        instituteStr = m.group("institute");
820
                    } catch (IllegalArgumentException e){
821
                        // match group col_name not found
822
                    }
823

  
824
                    try {
825
                        subCollectionStr = m.group("subCollection");
826
                    } catch (IllegalArgumentException e){
827
                        // match group subCollection not found
828
                    }
829
                    try {
830
                        accessionNumber = m.group("accNumber");
831

  
832
                        // try to improve the accessionNumber
833
                        if(accessionNumber!= null) {
834
                            accessionNumber = accessionNumber.trim();
835
                            Matcher m2 = accessionNumberOnlyPattern.matcher(accessionNumber);
836
                            String betterAccessionNumber = null;
837
                            if (m2.matches()) {
838
                                try {
839
                                    betterAccessionNumber = m.group("accNumber");
840
                                } catch (IllegalArgumentException e) {
841
                                    // match group acc_number not found
842
                                }
843
                            }
844
                            if (betterAccessionNumber != null) {
845
                                accessionNumber = betterAccessionNumber;
846
                            } else {
847
                                unusualAccessionNumber = true;
848
                            }
849
                        }
850

  
851
                    } catch (IllegalArgumentException e){
852
                        // match group acc_number not found
853
                    }
854

  
855
                    if(collectionCode == null && instituteStr == null){
856
                        logger.warn(csvReportLine(regNumber, "Type: neither 'collectionCode' nor 'institute' found in ", text));
857
                        continue;
858
                    }
859
                    collection = getCollection(collectionCode, instituteStr, subCollectionStr);
860
                    specimen = makeSpecimenType(fieldUnit, collection, accessionNumber);
861
                    break;
862
                }
863
            }
864
        }
865
        if(specimen == null) {
866
            logger.warn(csvReportLine(regNumber, "Type: Could not parse specimen", typeName.name().toString(), text));
867
        }
868
        if(unusualAccessionNumber){
869
            logger.warn(csvReportLine(regNumber, "Type: Unusual accession number", typeName.name().toString(), text, accessionNumber));
870
        }
871
        return specimen;
872
    }
873

  
874
    private DerivedUnit makeSpecimenType(FieldUnit fieldUnit, Collection collection, String accessionNumber) {
875

  
876
        DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen, fieldUnit);
877
        facade.setCollection(collection);
878
        if(accessionNumber != null){
879
            facade.setAccessionNumber(accessionNumber);
880
        }
881
        return facade.innerDerivedUnit();
882
    }
883

  
884
    private TaxonName makeBotanicalName(SimpleExcelTaxonImportState<CONFIG> state, String regNumber, String titleCacheStr, String nameStr,
885
                                            String authorStr, String nomRefTitle) {
886

  
887
        TaxonName taxonName;// cache field for the taxonName.titleCache
888
        String taxonNameTitleCache = null;
889
        Map<String, AnnotationType> nameAnnotations = new HashMap<>();
890

  
891
        // TitleCache preprocessing
892
        if(titleCacheStr.endsWith(ANNOTATION_MARKER_STRING) || (authorStr != null && authorStr.endsWith(ANNOTATION_MARKER_STRING))){
893
            nameAnnotations.put("Author abbreviation not checked.", AnnotationType.EDITORIAL());
894
            titleCacheStr = titleCacheStr.replace(ANNOTATION_MARKER_STRING, "").trim();
895
            if(authorStr != null) {
896
                authorStr = authorStr.replace(ANNOTATION_MARKER_STRING, "").trim();
897
            }
898
        }
899

  
900
        // parse the full taxon name
901
        if(!StringUtils.isEmpty(nomRefTitle)){
902
            String referenceSeparator = nomRefTitle.startsWith("in ") ? " " : ", ";
903
            String taxonFullNameStr = titleCacheStr + referenceSeparator + nomRefTitle;
904
            logger.debug(":::::" + taxonFullNameStr);
905
            taxonName = nameParser.parseReferencedName(taxonFullNameStr, NomenclaturalCode.ICNAFP, null);
906
        } else {
907
            taxonName = (TaxonName) nameParser.parseFullName(titleCacheStr, NomenclaturalCode.ICNAFP, null);
908
        }
909

  
910
        taxonNameTitleCache = taxonName.getTitleCache().trim();
911
        if (taxonName.isProtectedTitleCache()) {
912
            logger.warn(csvReportLine(regNumber, "Name could not be parsed", titleCacheStr));
913
        } else {
914

  
915
            boolean doRestoreTitleCacheStr = false;
916

  
917
            // Check if titleCache and nameCache are plausible
918
            String titleCacheCompareStr = titleCacheStr;
919
            String nameCache = taxonName.getNameCache();
920
            String nameCompareStr = nameStr;
921
            if(taxonName.isBinomHybrid()){
922
                titleCacheCompareStr = titleCacheCompareStr.replace(" x ", " ×");
923
                nameCompareStr = nameCompareStr.replace(" x ", " ×");
924
            }
925
            if(taxonName.isMonomHybrid()){
926
                titleCacheCompareStr = titleCacheCompareStr.replaceAll("^X ", "× ");
927
                nameCompareStr = nameCompareStr.replace("^X ", "× ");
928
            }
929
            if(authorStr != null && authorStr.contains(" et ")){
930
                titleCacheCompareStr = titleCacheCompareStr.replaceAll(" et ", " & ");
931
            }
932
            if (!taxonNameTitleCache.equals(titleCacheCompareStr)) {
933
                logger.warn(csvReportLine(regNumber, "The generated titleCache differs from the imported string", taxonNameTitleCache, " != ", titleCacheStr, " ==> original titleCacheStr has been restored"));
934
                doRestoreTitleCacheStr = true;
935
            }
936
            if (!nameCache.trim().equals(nameCompareStr)) {
937
                logger.warn(csvReportLine(regNumber, "The parsed nameCache differs from field '" + NAMESTRING + "'", nameCache, " != ", nameCompareStr));
938
            }
939

  
940
            //  Author
941
            //nameParser.handleAuthors(taxonName, titleCacheStr, authorStr);
942
            //if (!titleCacheStr.equals(taxonName.getTitleCache())) {
943
            //    logger.warn(regNumber + ": titleCache has changed after setting authors, will restore original titleCacheStr");
944
            //    doRestoreTitleCacheStr = true;
945
            //}
946

  
947
            if(doRestoreTitleCacheStr){
948
                taxonName.setTitleCache(titleCacheStr, true);
949
            }
950

  
951
            // deduplicate
952
            replaceAuthorNamesAndNomRef(state, taxonName);
953
        }
954

  
955
        // Annotations
956
        if(!nameAnnotations.isEmpty()){
957
            for(String text : nameAnnotations.keySet()){
958
                taxonName.addAnnotation(Annotation.NewInstance(text, nameAnnotations.get(text), Language.DEFAULT()));
959
            }
960
        }
961

  
962
        taxonName.addSource(OriginalSourceType.Import, regNumber, null, state.getConfig().getSourceReference(), null);
963

  
964
        getNameService().save(taxonName);
965

  
966
        return taxonName;
967
    }
968

  
969
    /**
970
     * @param state
971
     * @return
972
     */
973
    private TaxonNode getClassificationRootNode(IAPTImportState state) {
974

  
975
     //   Classification classification = state.getClassification();
976
     //   if (classification == null){
977
     //       IAPTImportConfigurator config = state.getConfig();
978
     //       classification = Classification.NewInstance(state.getConfig().getClassificationName());
979
     //       classification.setUuid(config.getClassificationUuid());
980
     //       classification.setReference(config.getSecReference());
981
     //       classification = getClassificationService().find(state.getConfig().getClassificationUuid());
982
     //   }
983
        TaxonNode rootNode = state.getRootNode();
984
        if (rootNode == null){
985
            rootNode = getTaxonNodeService().find(ROOT_UUID);
986
        }
987
        if (rootNode == null){
988
            Classification classification = state.getClassification();
989
            if (classification == null){
990
                Reference sec = state.getSecReference();
991
                String classificationName = state.getConfig().getClassificationName();
992
                Language language = Language.DEFAULT();
993
                classification = Classification.NewInstance(classificationName, sec, language);
994
                state.setClassification(classification);
995
                classification.setUuid(state.getConfig().getClassificationUuid());
996
                classification.getRootNode().setUuid(ROOT_UUID);
997
                getClassificationService().save(classification);
998
            }
999
            rootNode = classification.getRootNode();
1000
            state.setRootNode(rootNode);
1001
        }
1002
        return rootNode;
1003
    }
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff