Project

General

Profile

« Previous | Next » 

Revision fb3dec85

Added by Andreas Müller about 8 years ago

Latest changes for Cuba import

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/app/common/CdmDestinations.java
267 267
		return makeDestination(dbType, cdmServer, cdmDB, -1, cdmUserName, null);
268 268
	}
269 269

  
270
	   public static ICdmDataSource cdm_cuba_production(){
271
	        DatabaseTypeEnum dbType = DatabaseTypeEnum.MySQL;
272
	        String cdmServer = "160.45.63.171";
273
	        String cdmDB = "cdm_production_flora_cuba";
274
	        String cdmUserName = "edit";
275
	        return makeDestination(dbType, cdmServer, cdmDB, -1, cdmUserName, null);
276
	    }
277

  
270 278
	public static ICdmDataSource cdm_cyprus_production_tunnel(){
271 279
		DatabaseTypeEnum dbType = DatabaseTypeEnum.MySQL;
272 280
		String cdmServer = "127.0.0.1";
app-import/src/main/java/eu/etaxonomy/cdm/app/cuba/CubaActivator.java
39 39
	//database validation status (create, update, validate ...)
40 40
	static DbSchemaValidation hbm2dll = DbSchemaValidation.CREATE;
41 41

  
42
	private static final URI source = monocots();
43
//    private static final URI source = cyperaceae();
44

  
45

  
46
//	static final ICdmDataSource cdmDestination = CdmDestinations.localH2();
47
	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_local_mysql_test();
42
	boolean invers = true;
43
	boolean include = !invers;
44
    boolean doAsteraceae = include;
45
    boolean doConvolvulaceae = include;
46
    boolean doCyperaceae = include;
47
    boolean doDicotA_C = include;
48
    boolean doDicotD_M = include;
49
    boolean doDicotN_Z = include;
50
    boolean doEuphorbiaceae = include;
51
    boolean doFabaceae = include;
52
    boolean doGymnospermae = include;
53
    boolean doLamVerbenaceae = include;
54
    boolean doMalpighiaceae = include;
55
    boolean doMelastomataceae = ! include;
56
    boolean doMonocots = include ;
57
    boolean doMyrtaceae = include;
58
    boolean doOrchidaceae = include;
59
    boolean doRubiaceae = include;
60
    boolean doUrticaceae = include;
61

  
62

  
63

  
64
    static final ICdmDataSource cdmDestination = CdmDestinations.localH2();
65
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_local_mysql_test();
48 66
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_dev();
49
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_production();
67
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cuba_production();
50 68

  
51 69

  
52 70
	//feature tree uuid
......
54 72

  
55 73
	//classification
56 74
	static final UUID classificationUuid = UUID.fromString("5de394de-9c76-4b97-b04d-71be31c7f44b");
75
	private static final String classificationName = "Flora of Cuba";
57 76

  
58 77
	static final String sourceReferenceTitle = "Cuba import";
59 78

  
60 79
	//check - import
61 80
	static final CHECK check = CHECK.IMPORT_WITHOUT_CHECK;
62 81

  
63
	//taxa
82
	static boolean doVocabularies = (hbm2dll == DbSchemaValidation.CREATE);
64 83
	static final boolean doTaxa = true;
65 84
	static final boolean doDeduplicate = false;
66
	static final boolean doDistribution = false;
67 85

  
68 86

  
69 87
	private void doImport(ICdmDataSource cdmDestination){
70 88

  
89
	    URI source = monocots();  //just any
90

  
71 91
		//make Source
72 92
		CubaImportConfigurator config= CubaImportConfigurator.NewInstance(source, cdmDestination);
73 93
		config.setClassificationUuid(classificationUuid);
74
		config.setCheck(check);
94
        config.setClassificationName(classificationName);
95
        config.setCheck(check);
75 96
//		config.setDoDistribution(doDistribution);
76 97
		config.setDoTaxa(doTaxa);
77 98
		config.setDbSchemaValidation(hbm2dll);
78 99
		config.setSourceReferenceTitle(sourceReferenceTitle);
100
		config.setDoVocabularies(doVocabularies);
79 101

  
80
		CdmDefaultImport<CubaImportConfigurator> myImport = new CdmDefaultImport();
102
		CdmDefaultImport<CubaImportConfigurator> myImport = new CdmDefaultImport<CubaImportConfigurator>();
81 103

  
82 104

  
83 105
		//...
84
		if (true){
85
			System.out.println("Start import from ("+ source.toString() + ") ...");
86
			config.setSourceReference(getSourceReference(sourceReferenceTitle));
87
			myImport.invoke(config);
88
			if (doTaxa){
89
				FeatureTree tree = makeFeatureNodes(myImport.getCdmAppController().getTermService());
90
				myImport.getCdmAppController().getFeatureTreeService().saveOrUpdate(tree);
91
			}
92

  
93
			System.out.println("End import from ("+ source.toString() + ")...");
94
		}
106
        if (doAsteraceae){
107
            doSource(asteraceae(), config, myImport, doVocabularies);
108
        }
109
        if (doConvolvulaceae){
110
            doSource(convolvulaceae(), config, myImport, doVocabularies);
111
        }
112
        if (doCyperaceae){
113
            doSource(cyperaceae(), config, myImport, doVocabularies);
114
        }
115
        if (doDicotA_C){
116
            doSource(dicotA_C(), config, myImport, doVocabularies);
117
        }
118
        if (doDicotD_M){
119
            doSource(dicotD_M(), config, myImport, doVocabularies);
120
        }
121
        if (doDicotN_Z){
122
            doSource(dicotN_Z(), config, myImport, doVocabularies);
123
        }
124
        if (doEuphorbiaceae){
125
            doSource(euphorbiaceae(), config, myImport, doVocabularies);
126
        }
127
        if (doFabaceae){
128
            doSource(fabaceae(), config, myImport, doVocabularies);
129
        }
130
        if (doGymnospermae){
131
            doSource(gymnospermae(), config, myImport, doVocabularies);
132
        }
133
        if (doLamVerbenaceae){
134
            doSource(lamVerbenaceae(), config, myImport, doVocabularies);
135
        }
136
        if (doMalpighiaceae){
137
            doSource(malpighiaceae(), config, myImport, doVocabularies);
138
        }
139
        if (doMelastomataceae){
140
            doSource(melastomataceae(), config, myImport, doVocabularies);
141
        }
142
        if (doMonocots){
143
            doSource(monocots(), config, myImport, doVocabularies);
144
        }
145
        if (doMyrtaceae){
146
            doSource(myrtaceae(), config, myImport, doVocabularies);
147
        }
148
        if (doOrchidaceae){
149
            doSource(orchidaceae(), config, myImport, doVocabularies);
150
        }
151
        if (doRubiaceae){
152
            doSource(rubiaceae(), config, myImport, doVocabularies);
153
        }
154
        if (doUrticaceae){
155
            doSource(urticaceae(), config, myImport, doVocabularies);
156
        }
95 157

  
96 158

  
97 159

  
......
106 168
			logger.warn("Deduplicated " + count + " references.");
107 169
		}
108 170

  
171

  
172
		System.exit(0);
173

  
109 174
	}
110 175

  
176
    /**
177
     * @param source
178
     * @param config
179
     * @param myImport
180
     */
181
    private void doSource(URI source, CubaImportConfigurator config,
182
            CdmDefaultImport<CubaImportConfigurator> myImport, boolean doVocabularies) {
183
        config.setSource(source);
184
        System.out.println("Start import from ("+ source.toString() + ") ...");
185
        config.setSourceReference(getSourceReference(sourceReferenceTitle));
186
        config.setDoVocabularies(false);
187
        myImport.invoke(config);
188

  
189
        if (doVocabularies){
190
            FeatureTree tree = makeFeatureNodes(myImport.getCdmAppController().getTermService());
191
            myImport.getCdmAppController().getFeatureTreeService().saveOrUpdate(tree);
192
        }
193
        System.out.println("End import from ("+ source.toString() + ")...");
194
    }
195

  
111 196
	private Reference<?> getSourceReference(String string) {
112 197
		Reference<?> result = ReferenceFactory.newGeneric();
113 198
		result.setTitleCache(string, true);
......
143 228
	public static URI monocots() {
144 229
	    return URI.create("file:////BGBM-PESIHPC/Cuba/Monocot.xlsx");
145 230
	}
146

  
147 231
	//Cyperaceae
148 232
	public static URI cyperaceae() {
149 233
	    return URI.create("file:////BGBM-PESIHPC/Cuba/Cyper_Poaceae.xlsx");
150 234
	}
151

  
152

  
235
    //Fabaceae
236
    public static URI fabaceae() {
237
        return URI.create("file:////BGBM-PESIHPC/Cuba/Fabaceae.xlsx");
238
    }
239
    //Urticaceae
240
    public static URI urticaceae() {
241
        return URI.create("file:////BGBM-PESIHPC/Cuba/Urticaceae.xlsx");
242
    }
243
    //Asteraceae
244
    public static URI asteraceae() {
245
        return URI.create("file:////BGBM-PESIHPC/Cuba/Asteraceae.xlsx");
246
    }
247
    //Convolvulaceae
248
    public static URI convolvulaceae() {
249
        return URI.create("file:////BGBM-PESIHPC/Cuba/Convolvulaceae.xlsx");
250
    }
251
    //dicot A-C
252
    public static URI dicotA_C() {
253
        return URI.create("file:////BGBM-PESIHPC/Cuba/dicotA_C.xlsx");
254
    }
255
    //dicot D-M
256
    public static URI dicotD_M() {
257
        return URI.create("file:////BGBM-PESIHPC/Cuba/dicotD_M.xlsx");
258
    }
259
    //dicot N-Z
260
    public static URI dicotN_Z() {
261
        return URI.create("file:////BGBM-PESIHPC/Cuba/dicotN_Z.xlsx");
262
    }
263
    //Euphorbiaceae
264
    public static URI euphorbiaceae() {
265
        return URI.create("file:////BGBM-PESIHPC/Cuba/Euphorbiaceae.xlsx");
266
    }
267
    //Gymnospermae
268
    public static URI gymnospermae() {
269
        return URI.create("file:////BGBM-PESIHPC/Cuba/gymnospermae.xlsx");
270
    }
271
    //Lam.Verbenaceae
272
    public static URI lamVerbenaceae() {
273
        return URI.create("file:////BGBM-PESIHPC/Cuba/Lam_Verbenaceae.xlsx");
274
    }
275
    //Malpighiaceae
276
    public static URI malpighiaceae() {
277
        return URI.create("file:////BGBM-PESIHPC/Cuba/Malpighiaceae.xlsx");
278
    }
279
    //Melastomataceae
280
    public static URI melastomataceae() {
281
        return URI.create("file:////BGBM-PESIHPC/Cuba/Melastomataceae.xlsx");
282
    }
283
    //Myrtaceae
284
    public static URI myrtaceae() {
285
        return URI.create("file:////BGBM-PESIHPC/Cuba/Myrtaceae.xlsx");
286
    }
287
    //Orchidaceae
288
    public static URI orchidaceae() {
289
        return URI.create("file:////BGBM-PESIHPC/Cuba/Orchidaceae.xlsx");
290
    }
291
    //Rubiaceae
292
    public static URI rubiaceae() {
293
        return URI.create("file:////BGBM-PESIHPC/Cuba/Rubiaceae.xlsx");
294
    }
153 295

  
154 296
	/**
155 297
	 * @param args
app-import/src/main/java/eu/etaxonomy/cdm/io/cuba/CubaExcelImport.java
9 9

  
10 10
package eu.etaxonomy.cdm.io.cuba;
11 11

  
12
import java.util.ArrayList;
12 13
import java.util.Arrays;
13 14
import java.util.HashMap;
14 15
import java.util.List;
15 16
import java.util.Set;
16 17
import java.util.UUID;
18
import java.util.regex.Matcher;
19
import java.util.regex.Pattern;
17 20

  
18 21
import org.apache.commons.lang.StringUtils;
19 22
import org.apache.log4j.Logger;
......
21 24

  
22 25
import eu.etaxonomy.cdm.common.CdmUtils;
23 26
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
24
import eu.etaxonomy.cdm.io.cyprus.CyprusRow;
25 27
import eu.etaxonomy.cdm.io.excel.common.ExcelImporterBase;
28
import eu.etaxonomy.cdm.model.agent.Team;
29
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
26 30
import eu.etaxonomy.cdm.model.common.Annotation;
27 31
import eu.etaxonomy.cdm.model.common.AnnotationType;
28 32
import eu.etaxonomy.cdm.model.common.Language;
29 33
import eu.etaxonomy.cdm.model.description.Distribution;
30
import eu.etaxonomy.cdm.model.description.Feature;
31 34
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
32 35
import eu.etaxonomy.cdm.model.description.TaxonDescription;
33
import eu.etaxonomy.cdm.model.description.TextData;
34 36
import eu.etaxonomy.cdm.model.location.NamedArea;
35 37
import eu.etaxonomy.cdm.model.name.BotanicalName;
38
import eu.etaxonomy.cdm.model.name.NameRelationshipType;
36 39
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
37
import eu.etaxonomy.cdm.model.name.NonViralName;
40
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
41
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
38 42
import eu.etaxonomy.cdm.model.name.Rank;
39
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
40 43
import eu.etaxonomy.cdm.model.reference.Reference;
44
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
41 45
import eu.etaxonomy.cdm.model.taxon.Classification;
42
import eu.etaxonomy.cdm.model.taxon.Synonym;
46
import eu.etaxonomy.cdm.model.taxon.ITaxonTreeNode;
47
import eu.etaxonomy.cdm.model.taxon.SynonymRelationship;
43 48
import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
44 49
import eu.etaxonomy.cdm.model.taxon.Taxon;
45
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
46 50
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
47 51
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
48 52
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
......
55 59
@Component
56 60
public class CubaExcelImport extends ExcelImporterBase<CubaImportState> {
57 61
    private static final long serialVersionUID = -747486709409732371L;
58

  
59 62
    private static final Logger logger = Logger.getLogger(CubaExcelImport.class);
60 63

  
64
    private static final String HOMONYM_MARKER = ".*\\s+homon.?$";
65
    private static final String DOUBTFUL_MARKER = "^\\?\\s?";
66

  
67

  
68
    private static UUID rootUuid = UUID.fromString("206d42e4-ac32-4f20-a093-14826014e667");
69
    private static UUID plantaeUuid = UUID.fromString("139e7314-dd19-4286-a01d-8cc94ef77a09");
70

  
61 71
    private static INonViralNameParser<?> nameParser = NonViralNameParserImpl.NewInstance();
62 72
    private static NomenclaturalCode nc = NomenclaturalCode.ICNAFP;
63 73

  
64 74
    private  static List<String> expectedKeys= Arrays.asList(new String[]{"Fam.","(Fam.)","Taxón","(Notas)","Syn.","End","Ind","Ind? D","Nat","Dud P","Adv","Cult C","CuW","PR PR*","Art","Hab(*)","May","Mat","IJ","CuC","VC","Ci","SS","CA","Cam","LT","CuE","Gr","Ho","SC","Gu","Esp","Ja","PR","Men","Bah","Cay","AmN","AmC","AmS","VM"});
65 75

  
66
    private  static List<String> dummy= Arrays.asList(new String[]{
67
            "(Fam.)","Syn.","Ind? D","Nat","Dud P","Adv","Cult C",
68
            "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
69
            "CuC","VC","Ci","SS","CA","Cam","LT",
70
            "CuE","Gr","Ho","SC","Gu",
71
            "Esp","Ja","PR","Men","Bah","Cay",
72
            "AmN","AmC","AmS","VM"});
73

  
74

  
75 76
	@Override
76 77
    protected void analyzeRecord(HashMap<String, String> record, CubaImportState state) {
77

  
78
		Set<String> keys = record.keySet();
79
        for (String key: keys) {
80
            if (! expectedKeys.contains(key)){
81
                logger.warn("Unexpected Key: " + key);
82
            }
83
        }
84

  
85
        //Fam.
86
        TaxonNode familyTaxon = getFamilyTaxon(record, state);
87
        if (familyTaxon == null){
88
            logger.warn("Family not recognized. Do not handle row");
89
            return;
90
        }
91

  
92
        //(Fam.)
93
        //TODO
94

  
95
        //Taxón
96
        Taxon taxon = makeTaxon(record, state, familyTaxon);
97

  
98
        //(Notas)
99
        makeNotes(record, state, taxon);
100

  
101
        //Syn.
102
        makeSynonyms(record, state, taxon);
103

  
104
        //End, Ind, Ind? D, Nat N, Dud P, Adv A, Cult C
105
        makeCubanDistribution(record, state, taxon);
106

  
78
	    //we do everything in firstPass here
107 79
    	return;
108 80
    }
109 81

  
......
113 85
     * @param state
114 86
     * @param taxon
115 87
     */
116
    private void makeCubanDistribution(HashMap<String, String> record, CubaImportState state, Taxon taxon) {
88
    private void makeCubanDistribution(HashMap<String, String> record, CubaImportState state) {
117 89
        try {
118 90
            NamedArea cuba = getNamedArea(state, state.getTransformer().getNamedAreaUuid("C"), null, null, null, null, null);
119
            TaxonDescription desc = getTaxonDescription(taxon, false, true);
120
            PresenceAbsenceTerm status =  makeCubanStatus(record, state);
121
            Distribution distribution = Distribution.NewInstance(cuba, status);
122
            desc.addElement(distribution);
91
            TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
92
            List<PresenceAbsenceTerm> statuss =  makeCubanStatus(record, state);
93
            for (PresenceAbsenceTerm status : statuss){
94
                Distribution distribution = Distribution.NewInstance(cuba, status);
95
                desc.addElement(distribution);
96
            }
123 97
        } catch (UndefinedTransformerMethodException e) {
124 98
            e.printStackTrace();
125 99
        }
......
132 106
     * @return
133 107
     * @throws UndefinedTransformerMethodException
134 108
     */
135
    private PresenceAbsenceTerm makeCubanStatus(HashMap<String, String> record, CubaImportState state) throws UndefinedTransformerMethodException {
109
    private List<PresenceAbsenceTerm> makeCubanStatus(HashMap<String, String> record, CubaImportState state) throws UndefinedTransformerMethodException {
136 110
        boolean isAbsent = false;  //TODO
137 111

  
112
        String line = state.getCurrentLine() + ": ";
113
        List<PresenceAbsenceTerm> result = new ArrayList<>();
114

  
138 115
        String endemicStr = getValue(record, "End");
139 116
        String indigenousStr = getValue(record, "Ind");
140 117
        String indigenousDoubtStr = getValue(record, "Ind? D");
......
144 121
        String cultStr = getValue(record, "Cult C");
145 122

  
146 123
        if (endemicStr != null){
147
            boolean allNull = checkAllNull(indigenousStr, indigenousDoubtStr, naturalisedStr, dudStr, advStr, cultStr);
148
            if (!endemicStr.equals("+")){
149
                logger.warn("Endemic not recognized: " + endemicStr);
150
                return null;
151
            }else if (! allNull){
152
                logger.warn("Cuban state is endemic but others exist");
153
                return null;
124
            if(endemicStr.equals("+")){
125
                PresenceAbsenceTerm endemicState = state.getTransformer().getPresenceTermByKey("E");
126
                result.add(endemicState);
127
            }else if(isMinus(endemicStr)){
128
                UUID endemicUuid = state.getTransformer().getPresenceTermUuid("-E");
129
                PresenceAbsenceTerm endemicState = getPresenceTerm(state, endemicUuid, null, null, null, false);
130
                result.add(endemicState);
154 131
            }else{
155
                return PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA();
132
                logger.warn(line + "Endemic not recognized: " + endemicStr);
156 133
            }
157
        }else if (indigenousStr != null){
158
            boolean allNull = checkAllNull(indigenousDoubtStr, naturalisedStr, dudStr, advStr, cultStr);
159
            if (!checkPlusMinus(indigenousStr)){
160
                logger.warn("Indigenous not recognized: " + indigenousStr);
161
                return null;
162
            }else if (! allNull){
163
                //TODO may this exist?
164
                logger.warn("Cuban state is indigenous but others exist");
165
                return null;
166
            }else if(indigenousStr.equals("+")){
134
        }
135
        if (indigenousStr != null){
136
            if(indigenousStr.equals("+")){
167 137
                UUID indigenousUuid = state.getTransformer().getPresenceTermUuid("Ind.");
168
                PresenceAbsenceTerm indigenousState = getPresenceTerm(state, indigenousUuid, null, null, null);
169
                return indigenousState;
170
            }else if(indigenousStr.equals("-") || indigenousStr.equals("–")){
171
                logger.warn("Indigenous status '-' not yet handled)");
172
                return PresenceAbsenceTerm.ABSENT();
138
                PresenceAbsenceTerm indigenousState = getPresenceTerm(state, indigenousUuid, null, null, null, false);
139
                result.add(indigenousState);
140
            }else if(isMinus(indigenousStr)){
141
                PresenceAbsenceTerm haturalizedState = state.getTransformer().getPresenceTermByKey("-Ind.");
142
                result.add(haturalizedState);
143
            }else if(indigenousStr.equals("?")){
144
                UUID indigenousDoubtUuid = state.getTransformer().getPresenceTermUuid("?Ind.");
145
                PresenceAbsenceTerm indigenousDoubtState = getPresenceTerm(state, indigenousDoubtUuid, null, null, null, false);
146
                result.add(indigenousDoubtState);
173 147
            }else{
174
                logger.warn("Indigenous not recognized: " + indigenousStr);
175
                return null;
148
                logger.warn(line + "Indigenous not recognized: " + indigenousStr);
149
            }
150
        }
151
        if(indigenousDoubtStr != null){
152
            if(indigenousDoubtStr.equals("D")){
153
                UUID indigenousDoubtUuid = state.getTransformer().getPresenceTermUuid("Ind.?");
154
                PresenceAbsenceTerm indigenousDoubtState = getPresenceTerm(state, indigenousDoubtUuid, null, null, null, false);
155
                result.add(indigenousDoubtState);
156
            }else{
157
                logger.warn(line + "Indigenous doubtful not recognized: " + indigenousDoubtStr);
158
            }
159
        }
160
        if(naturalisedStr != null){
161
            if(naturalisedStr.equals("N")){
162
                  PresenceAbsenceTerm haturalizedState = state.getTransformer().getPresenceTermByKey("Nat.");
163
                  result.add(haturalizedState);
164
            }else if(isMinus(naturalisedStr)){
165
                UUID naturalisedErrorUuid = state.getTransformer().getPresenceTermUuid("-Nat.");
166
                PresenceAbsenceTerm naturalisedErrorState = getPresenceTerm(state, naturalisedErrorUuid, null, null, null, false);
167
                result.add(naturalisedErrorState);
168
            }else if(naturalisedStr.equals("?")){
169
                UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Nat.");
170
                PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
171
                result.add(naturalisedDoubtState);
172
            }else{
173
                logger.warn(line + "Naturalized not recognized: " + naturalisedStr);
174
            }
175
        }
176
        if(dudStr != null){
177
            if(dudStr.equals("P")){
178
                UUID dudUuid = state.getTransformer().getPresenceTermUuid("Dud.");
179
                PresenceAbsenceTerm dudState = getPresenceTerm(state, dudUuid, null, null, null, false);
180
                result.add(dudState);
181
            }else if(isMinus(dudStr)){
182
                UUID nonNativeErrorUuid = state.getTransformer().getPresenceTermUuid("-Dud.");
183
                PresenceAbsenceTerm nonNativeErrorState = getPresenceTerm(state, nonNativeErrorUuid, null, null, null, false);
184
                result.add(nonNativeErrorState);
185
            }else if(dudStr.equals("?")){
186
                UUID naturalisedDoubtUuid = state.getTransformer().getPresenceTermUuid("?Dud.");
187
                PresenceAbsenceTerm naturalisedDoubtState = getPresenceTerm(state, naturalisedDoubtUuid, null, null, null, false);
188
                result.add(naturalisedDoubtState);
189
            }else{
190
                logger.warn(line + "non-native and doubtfully naturalised not recognized: " + dudStr);
191
            }
192
        }
193
        if(advStr != null){
194
            if(advStr.equals("A")){
195
                UUID advUuid = state.getTransformer().getPresenceTermUuid("Adv.");
196
                PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
197
                result.add(advState);
198
            }else if(isMinus(advStr)){
199
                UUID advUuid = state.getTransformer().getPresenceTermUuid("-Adv.");
200
                PresenceAbsenceTerm advState = getPresenceTerm(state, advUuid, null, null, null, false);
201
                result.add(advState);
202
            }else{
203
                logger.warn(line + "'adventive (casual) alien' not recognized: " + advStr);
204
            }
205
        }else if(cultStr != null){
206
            if (! (cultStr.matches("(C|\\(C\\)|\\?|–)"))){
207
                logger.warn("'cultivated' not recognized: " + cultStr);
208
            }else if(cultStr.equals("C")){
209
                PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("Cult.");
210
                result.add(cultivatedState);
211
            }else if(cultStr.equals("?")){
212
                PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("?Cult.");
213
                result.add(cultivatedState);
214
            }else if(cultStr.equals("(C)")){
215
                UUID ocassualCultUuid = state.getTransformer().getPresenceTermUuid("(C)");
216
                PresenceAbsenceTerm cultivatedState = getPresenceTerm(state, ocassualCultUuid, null, null, null, false);
217
                result.add(cultivatedState);
218
            }else if(isMinus(cultStr)){
219
                PresenceAbsenceTerm cultivatedState = state.getTransformer().getPresenceTermByKey("-Cult.");
220
                result.add(cultivatedState);
221
            }else{
222
                logger.warn(line + "'cultivated' not recognized: " + cultStr);
176 223
            }
177
        }else if(indigenousDoubtStr != null){
178

  
179 224
        }
180 225

  
181
        return null;
226
        return result;
227
    }
228

  
229

  
230
    /**
231
     * @param indigenousStr
232
     * @return
233
     */
234
    private boolean isMinus(String str) {
235
        return str.equals("-") || str.equals("–");
182 236
    }
183 237

  
184 238

  
......
186 240
     * @param indigenousStr
187 241
     * @return
188 242
     */
189
    private boolean checkPlusMinus(String str) {
190
        return str.equals("+") || str.equals("-") || str.equals("–");
243
    private boolean checkPlusMinusDoubt(String str) {
244
        return str.equals("+") || isMinus(str)|| str.equals("?");
191 245
    }
192 246

  
193 247

  
......
209 263
    }
210 264

  
211 265

  
266
    private static final String acceptedRegExStr = "\\(([^\\[\\]“”]{6,})\\)";
267
//    String heterotypicRegExStr2 = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})" +
268
//                    + "(\\((.{6,})\\))?";
269
    private static final String heterotypicRegExStr = "([^\\(\\[\\]“”]{5,})"
270
                                                     +"(\\((.{6,})\\))?";
271
    private static final String heterotypicRegExStr_TEST = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})"
272
            +"(\\((.{6,})\\))?";
273
    private static final String missapliedRegExStr = "“(.*{5,})”\\s+(auct\\.(\\sFC\\-S)?(\\s+p\\.\\s*p\\.)?|sensu\\s+.{2,})";
274
    private static final String nomInvalRegExStr = "“(.*{5,})”\\s+nom\\.\\s+inval\\.";
275
    private static final String homonymRegExStr = "\\s*(\\[.*\\])*\\s*";
276

  
277
    private static final Pattern acceptedRegEx = Pattern.compile(acceptedRegExStr + homonymRegExStr);
278
    private static final Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
279
    private static final Pattern missapliedRegEx = Pattern.compile(missapliedRegExStr);
280
    private static final Pattern nomInvalRegEx = Pattern.compile(nomInvalRegExStr);
281

  
212 282
    /**
213 283
     * @param record
214 284
     * @param state
215 285
     * @param taxon
216 286
     */
217
    private void makeSynonyms(HashMap<String, String> record, CubaImportState state, Taxon taxon) {
218
        // TODO Auto-generated method stub
287
    private void makeSynonyms(HashMap<String, String> record, CubaImportState state) {
288
//        boolean forAccepted = true;
289
        String synonymStr = record.get("Syn.");
290
        String line = state.getCurrentLine() + ": ";
291

  
292
        if (synonymStr == null){
293
            //TODO test that this is not a synonym only line
294
            return;
295
        }
296
        synonymStr = synonymStr.trim();
297

  
298
//        String heterotypicRegExStr = "([^\\(]{5,}(\\(.+\\))?[^\\)\\(]{2,})(\\((.{6,})\\))?";
299
//        String heterotypicRegExStr = "([^\\(]{5,})(\\((.{6,})\\))?";
300

  
301
//        Pattern heterotypicRegEx = Pattern.compile(heterotypicRegExStr + homonymRegExStr);
302

  
303
        Matcher missapliedMatcher = missapliedRegEx.matcher(synonymStr);
304
        Matcher nomInvalMatcher = nomInvalRegEx.matcher(synonymStr);
305
        Matcher acceptedMatcher = acceptedRegEx.matcher(synonymStr);
306
        Matcher heterotypicMatcher = heterotypicRegEx.matcher(synonymStr);
307

  
308
        List<BotanicalName> homonyms = new ArrayList<>();
309
        if (missapliedMatcher.matches()){
310
            String firstPart = missapliedMatcher.group(1);
311
            BotanicalName name = (BotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
312

  
313
            String secondPart = missapliedMatcher.group(2);
314
            Taxon misappliedNameTaxon = Taxon.NewInstance(name, null);
315
            if (secondPart.startsWith("sensu")){
316
                secondPart = secondPart.substring(5).trim();
317
                if (secondPart.contains(" ")){
318
                    logger.warn(line + "Second part contains more than 1 word. Check if this is correct: " + secondPart);
319
                }
320
                Reference<?> sensu = ReferenceFactory.newGeneric();
321
                Team team = Team.NewTitledInstance(secondPart, null);
322
                sensu.setAuthorship(team);
323
                misappliedNameTaxon.setSec(sensu);
324
            }else if (secondPart.matches("auct.((\\s+p\\.\\s*p\\.)|(\\sFC\\-S))?")){
325
                secondPart = secondPart.replace("p. p.", "p.p.");
326
                misappliedNameTaxon.setAppendedPhrase(secondPart);
327
            }else{
328
                logger.warn(line + "Misapplied second part not recognized: " + secondPart);
329
            }
330
            //TODO
331
            Reference<?> relRef = null;
332
            state.getCurrentTaxon().addMisappliedName(misappliedNameTaxon, relRef, null);
333
        }else if (nomInvalMatcher.matches()){
334
            String firstPart = nomInvalMatcher.group(1);
335
            BotanicalName name = (BotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
336
            NomenclaturalStatus status = NomenclaturalStatus.NewInstance( NomenclaturalStatusType.INVALID());
337
            name.addStatus(status);
338
            state.getCurrentTaxon().addSynonymName(name, SynonymRelationshipType.SYNONYM_OF());
339
        }else if (acceptedMatcher.matches()){
340
            String firstPart = acceptedMatcher.group(1);
341
            String homonymPart = acceptedMatcher.groupCount() < 2 ? null : acceptedMatcher.group(2);
342
            handleHomotypicGroup(firstPart, state, (BotanicalName)state.getCurrentTaxon().getName(), false, homonyms, homonymPart, false);
343
        }else if(heterotypicMatcher.matches()){
344
            String firstPart = heterotypicMatcher.group(1).trim();
345
            String secondPart = heterotypicMatcher.groupCount() < 3 ? null : heterotypicMatcher.group(3);
346
            String homonymPart = heterotypicMatcher.groupCount() < 4 ? null : heterotypicMatcher.group(4);
347
            boolean isDoubtful = firstPart.matches("^\\?\\s*.*");
348
            boolean isHomonym = firstPart.trim().matches(HOMONYM_MARKER);
349
            firstPart = normalizeStatus(firstPart);
350
            BotanicalName synName = (BotanicalName)nameParser.parseReferencedName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
351
            if (synName.isProtectedTitleCache()){
352
                logger.warn(line + "heterotypic base synonym could not be parsed correctly:" + firstPart);
353
            }
354
            if (isHomonym){
355
                homonyms.add(synName);
356
            }
357
            SynonymRelationship sr = state.getCurrentTaxon().addHeterotypicSynonymName(synName);
358
            sr.getSynonym().setDoubtful(isDoubtful);
359
            handleHomotypicGroup(secondPart, state, synName, true, homonyms, homonymPart, isDoubtful);
360
        }else{
361
            logger.warn(line + "Synonym entry does not match: " + synonymStr);
362
        }
363
    }
364

  
365

  
366

  
367
    /**
368
     * @param synonymStr
369
     * @param state
370
     * @param homonyms
371
     * @param homonymPart
372
     * @param isDoubtful
373
     * @param taxon
374
     * @param homotypicalGroup
375
     */
376
    private void handleHomotypicGroup(String homotypicStr,
377
            CubaImportState state,
378
            BotanicalName homotypicName,
379
            boolean isHeterotypic,
380
            List<BotanicalName> homonyms,
381
            String homonymPart,
382
            boolean isDoubtful) {
383

  
384
        if (homotypicStr == null){
385
            return;
386
        }else if (homotypicStr.startsWith("(") && homotypicStr.endsWith("")){
387
            homotypicStr = homotypicStr.substring(1, homotypicStr.length() - 1);
388
        }
389

  
390
        BotanicalName currentBasionym = homotypicName;
391
        String[] splits = homotypicStr.split("\\s*,\\s*");
392
        for (String split : splits){
393
            boolean isHomonym = split.trim().matches(HOMONYM_MARKER);
394
            String singleName = normalizeStatus(split);
395
            BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(singleName, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
396
            if (newName.isProtectedTitleCache()){
397
                logger.warn(state.getCurrentLine() + ": homotypic name part could not be parsed: " + split);
398
            }
399
            if (isHomonym){
400
                homonyms.add(newName);
401
            }
402
            if (isHeterotypic){
403
                SynonymRelationship sr = state.getCurrentTaxon().addHeterotypicSynonymName(newName, homotypicName.getHomotypicalGroup(), null, null);
404
                sr.getSynonym().setDoubtful(isDoubtful);
405
//                newName.addBasionym(homotypicName);
406
                currentBasionym = handleBasionym(currentBasionym, newName);
407
            }else{
408
                state.getCurrentTaxon().addHomotypicSynonymName(newName, null, null);
409
                handleBasionym(currentBasionym, newName);
410
            }
411
        }
412
        makeHomonyms(homonyms, homonymPart, state);
413
    }
414

  
415

  
416
    /**
417
     * @param homonyms
418
     * @param homonymPart
419
     * @param state
420
     */
421
    private void makeHomonyms(List<BotanicalName> homonyms, String homonymPart, CubaImportState state) {
422
        String line = state.getCurrentLine() + ": ";
423
        homonymPart = homonymPart == null ? "" : homonymPart.trim();
424
        if (homonyms.isEmpty() && homonymPart.equals("")){
425
            return;
426
        }else if (homonymPart.equals("")){
427
            logger.warn(line + "SynonymPart has homonyms but homonymPart is empty");
428
            return;
429
        }
430
        homonymPart = homonymPart.substring(1, homonymPart.length() - 1);
431
        String[] splits = homonymPart.split("\\]\\s*\\[");
432
        if (splits.length != homonyms.size()){
433
            logger.warn(line + "Number of homonyms (" + homonyms.size() + ") and homonymParts ("+splits.length+") does not match");
434
            return;
435
        }
436
        int i = 0;
437
        for (String split : splits){
438
            split = split.replaceAll("^non\\s+", "");
439
            BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(split, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
440
            if (newName.isProtectedTitleCache()){
441
                logger.warn(state.getCurrentLine() + ": homonym name could not be parsed: " + split);
442
            }
443
            newName.addRelationshipToName(homonyms.get(i), NameRelationshipType.LATER_HOMONYM(), null);
444
            i++;
445
        }
446
    }
447

  
448

  
449
    /**
450
     * @param newName
451
     * @param homotypicName
452
     * @return
453
     */
454
    private BotanicalName handleBasionym(BotanicalName currentBasionym, BotanicalName name2) {
455
        BotanicalName basionymName = currentBasionym;
456
        BotanicalName newCombination = name2;
457
        //switch if necessary
458
        if (basionymName.getBasionymAuthorship() != null && newCombination.getBasionymAuthorship() == null){
459
            basionymName = name2;
460
            newCombination = currentBasionym;
461
        }
462
        if (matchAuthor(basionymName.getCombinationAuthorship(), newCombination.getBasionymAuthorship())){
463
            newCombination.getHomotypicalGroup().setGroupBasionym(basionymName);
464
        }
465
        return basionymName;
466
    }
467

  
219 468

  
469
    /**
470
     * @param combinationAuthorship
471
     * @param basi
472
     * @return
473
     */
474
    private boolean matchAuthor(TeamOrPersonBase<?> author1, TeamOrPersonBase<?> author2) {
475
        if (author1 == null || author2 == null){
476
            return false;
477
        }else {
478
            return author1.getNomenclaturalTitle().equals(author2.getNomenclaturalTitle());
479
        }
220 480
    }
221 481

  
222 482

  
......
225 485
     * @param state
226 486
     * @param taxon
227 487
     */
228
    private void makeNotes(HashMap<String, String> record, CubaImportState state, Taxon taxon) {
488
    private void makeNotes(HashMap<String, String> record, CubaImportState state) {
229 489
        String notesStr = getValue(record, "(Notas)");
230 490
        if (notesStr == null){
231 491
            return;
......
233 493
            Annotation annotation = Annotation.NewDefaultLanguageInstance(notesStr);
234 494
            //TODO
235 495
            annotation.setAnnotationType(AnnotationType.EDITORIAL());
236
            taxon.addAnnotation(annotation);
496
            state.getCurrentTaxon().addAnnotation(annotation);
237 497
        }
238 498
    }
239 499

  
......
244 504
     * @param familyTaxon
245 505
     * @return
246 506
     */
247
    private Taxon makeTaxon(HashMap<String, String> record, CubaImportState state, TaxonNode familyNode) {
507
    private Taxon makeTaxon(HashMap<String, String> record, CubaImportState state, TaxonNode familyNode, boolean isSynonym) {
248 508
        String taxonStr = getValue(record, "Taxón");
249 509
        if (taxonStr == null){
250
            return null;
510
            return isSynonym ? state.getCurrentTaxon() : null;
251 511
        }
252 512
        boolean isAbsent = false;
253 513
        if (taxonStr.startsWith("[") && taxonStr.endsWith("]")){
254 514
            taxonStr = taxonStr.substring(1, taxonStr.length() - 1);
255 515
            isAbsent = true;
256 516
        }
517
        taxonStr = normalizeStatus(taxonStr);
257 518

  
258
        TaxonNameBase<?,?> botanicalName = nameParser.parseFullName(taxonStr, nc, Rank.SPECIES());
519
        BotanicalName botanicalName = (BotanicalName)nameParser.parseReferencedName(taxonStr, nc, Rank.SPECIES());
520
        Reference<?> sec = getSecReference(state);
521
        Taxon taxon = Taxon.NewInstance(botanicalName, sec);
522
        TaxonNode higherNode;
259 523
        if (botanicalName.isProtectedTitleCache()){
260
            logger.warn("Taxon could not be parsed: " + taxonStr);
524
            logger.warn(state.getCurrentLine() + ": Taxon could not be parsed: " + taxonStr);
525
            higherNode = familyNode;
526
        }else{
527
            String genusStr = botanicalName.getGenusOrUninomial();
528
            Taxon genus = state.getHigherTaxon(genusStr);
529
            if (genus != null){
530
                higherNode = genus.getTaxonNodes().iterator().next();
531
            }else{
532
                BotanicalName name = BotanicalName.NewInstance(Rank.GENUS());
533
                name.setGenusOrUninomial(genusStr);
534
                genus = Taxon.NewInstance(name, sec);
535
                higherNode = familyNode.addChildTaxon(genus, null, null);
536
                state.putHigherTaxon(genusStr, genus);
537
            }
261 538
        }
262
        Reference<?> sec = null;
263
        Taxon taxon = Taxon.NewInstance(botanicalName, sec);
264
        familyNode.addChildTaxon(taxon, null, null);
539

  
540
        higherNode.addChildTaxon(taxon, null, null);
265 541

  
266 542
        return taxon;
267 543
    }
268 544

  
545
    /**
546
     * @param state
547
     * @return
548
     */
549
    private Reference<?> getSecReference(CubaImportState state) {
550
        Reference<?> result = state.getSecReference();
551
        if (result == null){
552
            result = ReferenceFactory.newDatabase();
553
            result.setTitle("Flora of Cuba");
554
            state.setSecReference(result);
555
        }
556
        return result;
557
    }
558

  
559

  
560
    private static final String[] nomStatusStrings = new String[]{"nom. cons.", "ined.", "nom. illeg.",
561
            "nom. rej.","nom. cons. prop.","nom. altern."};
562
    /**
563
     * @param taxonStr
564
     * @return
565
     */
566
    private String normalizeStatus(String taxonStr) {
567
        if (taxonStr == null){
568
            return null;
569
        }
570
        for (String nomStatusStr : nomStatusStrings){
571
            nomStatusStr = " " + nomStatusStr;
572
            if (taxonStr.endsWith(nomStatusStr)){
573
                taxonStr = taxonStr.replace(nomStatusStr, "," + nomStatusStr);
574
            }
575
        }
576
        taxonStr = taxonStr.replaceAll(HOMONYM_MARKER, "").trim();
577
        taxonStr = taxonStr.replaceAll(DOUBTFUL_MARKER, "").trim();
578
        return taxonStr;
579

  
580

  
581
    }
582

  
269 583

  
270 584
    /**
271 585
     * @param record
......
284 598
        }else{
285 599
            BotanicalName name = BotanicalName.NewInstance(Rank.FAMILY());
286 600
            name.setGenusOrUninomial(familyStr);
287
            Reference<?> sec = null;
601
            Reference<?> sec = getSecReference(state);
288 602
            Taxon taxon = Taxon.NewInstance(name, sec);
289
            Classification classification = getClassification(state);
290
            familyNode = classification.addChildTaxon(taxon, sec, null);
603
            ITaxonTreeNode rootNode = getClassification(state);
604
            familyNode = rootNode.addChildTaxon(taxon, sec, null);
605
            state.putHigherTaxon(familyStr, taxon);
291 606
        }
292 607

  
293 608
        return familyNode;
......
298 613
     * @param state
299 614
     * @return
300 615
     */
301
    private Classification getClassification(CubaImportState state) {
616
    private TaxonNode getClassification(CubaImportState state) {
302 617
        Classification classification = state.getClassification();
303 618
        if (classification == null){
304
            String name = state.getConfig().getClassificationName();
305
            //TODO
306
            Reference<?> sec = null;
307
            Language language = Language.DEFAULT();
308
            classification = Classification.NewInstance(name, sec, language);
309
            state.setClassification(classification);
619
            classification = getClassificationService().find(state.getConfig().getClassificationUuid());
620
        }
621
        TaxonNode rootNode = state.getRootNode();
622
        if (rootNode == null){
623
            rootNode = getTaxonNodeService().find(plantaeUuid);
624
        }
625
        if (rootNode == null){
626
            Reference<?> sec = getSecReference(state);
627
            if (classification == null){
628
                String classificationName = state.getConfig().getClassificationName();
629
                //TODO
630
                Language language = Language.DEFAULT();
631
                classification = Classification.NewInstance(classificationName, sec, language);
632
                state.setClassification(classification);
633
                classification.setUuid(state.getConfig().getClassificationUuid());
634
                classification.getRootNode().setUuid(rootUuid);
635
            }
636

  
637
            BotanicalName plantaeName = BotanicalName.NewInstance(Rank.KINGDOM());
638
            plantaeName.setGenusOrUninomial("Plantae");
639
            Taxon plantae = Taxon.NewInstance(plantaeName, sec);
640
            TaxonNode plantaeNode = classification.addChildTaxon(plantae, null, null);
641
            plantaeNode.setUuid(plantaeUuid);
642
            state.setRootNode(plantaeNode);
310 643
            getClassificationService().save(classification);
644

  
645
            rootNode = plantaeNode;
311 646
        }
312
        return classification;
647
        return rootNode;
313 648
    }
314 649

  
315 650

  
......
330 665
    }
331 666

  
332 667

  
333
	private Feature redBookCategory;
334
	private Feature endemism;
335

  
336

  
337 668

  
338 669
	/**
339 670
	 *  Stores taxa records in DB
340 671
	 */
341 672
	@Override
342 673
    protected void firstPass(CubaImportState state) {
674
	    boolean isSynonym = false;
343 675

  
344
		CyprusRow taxonLight = null; //state.getCyprusRow();
345
		Reference<?> citation = null;
346
		String microCitation = null;
347

  
348
//		//species name
349
//		String speciesStr = taxonLight.getSpecies();
350
//		String subSpeciesStr = taxonLight.getSubspecies();
351
//		String homotypicSynonymsString = taxonLight.getHomotypicSynonyms();
352
//		List<String> homotypicSynonymList = Arrays.asList(homotypicSynonymsString.split(";"));
353
//		String heterotypicSynonymsString = taxonLight.getHeterotypicSynonyms();
354
//		List<String> heterotypicSynonymList = Arrays.asList(heterotypicSynonymsString.split(";"));
355
//
356
//		String systematicsString = taxonLight.getSystematics();
357
//		String endemismString = taxonLight.getEndemism();
358
//		String statusString = taxonLight.getStatus();
359
//		String redBookCategory = taxonLight.getRedDataBookCategory();
360
//
361
//		if (StringUtils.isNotBlank(speciesStr)) {
362
//			boolean speciesIsExisting = false;
363
//			Taxon mainTaxon = null;
364
//			//species
365
//			Taxon speciesTaxon = (Taxon)createTaxon(state, Rank.SPECIES(), speciesStr, Taxon.class, nc);
366
//			mainTaxon = speciesTaxon;
367
//
368
//			//subspecies
369
//			if (StringUtils.isNotBlank(subSpeciesStr)){
370
//				Taxon existingSpecies = state.getHigherTaxon(speciesStr);
371
//				if (existingSpecies != null){
372
//					speciesIsExisting = true;
373
//					speciesTaxon = existingSpecies;
374
//				}
375
//
376
//				Taxon subSpeciesTaxon = (Taxon)createTaxon(state, Rank.SUBSPECIES(), subSpeciesStr, Taxon.class, nc);
377
//
378
//				if (subSpeciesTaxon != null){
379
//					makeParent(state, speciesTaxon, subSpeciesTaxon, citation, microCitation);
380
//				}
381
//				mainTaxon = subSpeciesTaxon;
382
//				state.putHigherTaxon(speciesStr, speciesTaxon);
383
//			}
384
//
385
//			if (! speciesIsExisting){
386
//				makeHigherTaxa(state, taxonLight, speciesTaxon, citation, microCitation);
387
//			}
388
//			makeHomotypicSynonyms(state, citation, microCitation, homotypicSynonymList, mainTaxon);
389
//			makeHeterotypicSynonyms(state, citation, microCitation, heterotypicSynonymList, mainTaxon);
390
//			makeSystematics(systematicsString, mainTaxon);
391
//			makeEndemism(endemismString, mainTaxon);
392
//			makeStatus(statusString, mainTaxon);
393
//			makeRedBookCategory(redBookCategory, mainTaxon);
394
//
395
////			state.putHigherTaxon(higherName, uuid);//(speciesStr, mainTaxon);
396
//			getTaxonService().save(mainTaxon);
397
//		}
398
		return;
399
    }
676
        int line = state.getCurrentLine();
677
        HashMap<String, String> record = state.getOriginalRecord();
400 678

  
679
        Set<String> keys = record.keySet();
680
        for (String key: keys) {
681
            if (! expectedKeys.contains(key)){
682
                logger.warn("Unexpected Key: " + key);
683
            }
684
        }
401 685

  
402
	private void makeHigherTaxa(CubaImportState state, CyprusRow taxonLight, Taxon speciesTaxon, Reference citation, String microCitation) {
403
		String divisionStr = taxonLight.getDivision();
404
		String genusStr = taxonLight.getGenus();
405
		String familyStr = taxonLight.getFamily();
686
        if (record.get("Fam.") == null && keys.size() == 2 && record.get("Syn.") == null && record.get("Nat") != null && record.get("Adv") != null){
687
            //second header line, don't handle
688
            return;
689
        }
406 690

  
407
		Taxon division = getTaxon(state, divisionStr, Rank.DIVISION(), null, citation, microCitation);
408
		Taxon family = getTaxon(state, familyStr, Rank.FAMILY(), division, citation, microCitation);
409
		Taxon genus = getTaxon(state, genusStr, Rank.GENUS(), family, citation, microCitation);
410
		makeParent(state, genus, speciesTaxon, citation, microCitation)	;
411
	}
691
        //Fam.
692
        TaxonNode familyTaxon = getFamilyTaxon(record, state);
693
        if (familyTaxon == null){
694
            if (record.get("Taxón") != null){
695
                logger.warn(line + ": Family not recognized but taxon exists:" + record.get("Taxón"));
696
                return;
697
            }else if (record.get("Syn.") == null){
698
                logger.warn(line + ": Family not recognized but also no synonym exists");
699
                return;
700
            }else{
701
                isSynonym = true;
702
            }
703
        }
412 704

  
705
        //(Fam.)
706
        //TODO
413 707

  
414
	private Taxon getTaxon(CubaImportState state, String taxonNameStr, Rank rank, Taxon parent, Reference citation, String microCitation) {
415
		Taxon result;
416
		if (state.containsHigherTaxon(taxonNameStr)){
417
			result = state.getHigherTaxon(taxonNameStr);
418
		}else{
419
			result = (Taxon)createTaxon(state, rank, taxonNameStr, Taxon.class, nc);
420
			state.putHigherTaxon(taxonNameStr, result);
421
			if (parent == null){
422
				makeParent(state, null,result, citation, microCitation);
423
			}else{
424
				makeParent(state, parent, result, citation, microCitation);
425
			}
426

  
427
		}
428
		return result;
429
	}
708
        //Taxón
709
        Taxon taxon = makeTaxon(record, state, familyTaxon, isSynonym);
710
        if (taxon == null && ! isSynonym){
711
            logger.warn(line + ": taxon could not be created and is null");
712
            return;
713
        }
714
        state.setCurrentTaxon(taxon);
430 715

  
716
        //(Notas)
717
        makeNotes(record, state);
431 718

  
432
	private void makeHomotypicSynonyms(CubaImportState state,
433
			Reference citation, String microCitation, List<String> homotypicSynonymList, Taxon mainTaxon) {
434
		for (String homotypicSynonym: homotypicSynonymList){
435
			if (StringUtils.isNotBlank(homotypicSynonym)){
436
				Synonym synonym = (Synonym)createTaxon(state, null, homotypicSynonym, Synonym.class, nc);
437
				mainTaxon.addHomotypicSynonym(synonym, citation, microCitation);
438
			}
439
		}
440
	}
719
        //Syn.
720
        makeSynonyms(record, state);
441 721

  
722
        //End, Ind, Ind? D, Nat N, Dud P, Adv A, Cult C
723
        makeCubanDistribution(record, state);
442 724

  
443
	private void makeHeterotypicSynonyms(CubaImportState state, Reference citation, String microCitation, List<String> heterotypicSynonymList, Taxon mainTaxon) {
444
		for (String heterotypicSynonym: heterotypicSynonymList){
445
			if (StringUtils.isNotBlank(heterotypicSynonym)){
446
				Synonym synonym = (Synonym)createTaxon(state, null, heterotypicSynonym, Synonym.class, nc);
447
				mainTaxon.addSynonym(synonym, SynonymRelationshipType.HETEROTYPIC_SYNONYM_OF(), citation, microCitation);
448
			}
449
		}
450
	}
451 725

  
726
        // "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
727
//        "CuC","VC","Ci","SS","CA","Cam","LT",
728
//        "CuE","Gr","Ho","SC","Gu",
729
//      "Esp","Ja","PR","Men","Bah","Cay",
730
//      "AmN","AmC","AmS","VM"});
731
        makeProvincesDistribution(record, state);
452 732

  
453
	private void makeSystematics(String systematicsString, Taxon mainTaxon) {
454
		//Systematics
455
		if (StringUtils.isNotBlank(systematicsString)){
456
			TaxonDescription td = this.getTaxonDescription(mainTaxon, false, true);
457
			TextData textData = TextData.NewInstance(Feature.SYSTEMATICS());
458
			textData.putText(Language.UNDETERMINED(), systematicsString);
459
			td.addElement(textData);
460
		}
461
	}
733
		return;
734
    }
462 735

  
463 736

  
464
	private void makeEndemism(String endemismString, Taxon mainTaxon) {
465
		//endemism
466
		if (StringUtils.isNotBlank(endemismString)){
467
			//OLD - not wanted as marker
468
//			boolean flag;
469
//			if (endemismString.trim().equalsIgnoreCase("not endemic") || endemismString.trim().equalsIgnoreCase("ne?")){
470
//				flag = false;
471
//			}else if (endemismString.trim().equalsIgnoreCase("endemic")){
472
//				flag = true;
473
//			}else{
474
//				throw new RuntimeException(endemismString + " is not a valid value for endemism");
475
//			}
476
//			Marker marker = Marker.NewInstance(MarkerType.ENDEMIC(), flag);
477
//			mainTaxon.addMarker(marker);
478
			//text data
479
			TaxonDescription td = this.getTaxonDescription(mainTaxon, false, true);
480
			TextData textData = TextData.NewInstance(endemism);
481
			textData.putText(Language.ENGLISH(), endemismString);
482
			td.addElement(textData);
483
		}
484
	}
485 737

  
738
	/**
739
     * @param record
740
     * @param state
741
     * @param taxon
742
     */
743
    // "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
744
//  "CuC","VC","Ci","SS","CA","Cam","LT",
745
//  "CuE","Gr","Ho","SC","Gu",
746
    private void makeProvincesDistribution(HashMap<String, String> record, CubaImportState state) {
747
        List<String> areaKeys = Arrays.asList(new String[]{
748
                "CuW","PR PR*","Art","Hab(*)","May","Mat","IJ",
749
                "CuC","VC","Ci","SS","CA","Cam","LT",
750
                "CuE","Gr","Ho","SC","Gu",
751
                "Esp","Ja","PR","Men","Bah","Cay",
752
                "AmN","AmC","AmS","VM"});
753
        for (String areaKey : areaKeys){
754
            makeSingleProvinceDistribution(areaKey, record, state);
755
        }
756

  
757
    }
486 758

  
487
	private void makeRedBookCategory(String redBookCategory, Taxon mainTaxon) {
488
		//red data book category
489
		if (StringUtils.isNotBlank(redBookCategory)){
490
			TaxonDescription td = this.getTaxonDescription(mainTaxon, false, true);
491
			TextData textData = TextData.NewInstance(this.redBookCategory);
492
			textData.putText(Language.ENGLISH(), redBookCategory);
493
			td.addElement(textData);
494
		}
495
	}
759

  
760
    /**
761
     * @param areaKey
762
     * @param record
763
     * @param state
764
     * @param taxon
765
     */
766
    private void makeSingleProvinceDistribution(String areaKey,
767
            HashMap<String, String> record,
768
            CubaImportState state) {
769
        try {
770
            UUID areaUuid = state.getTransformer().getNamedAreaUuid(areaKey);
771
            if (areaUuid == null){
772
                logger.warn("Area not recognized: " + areaKey);
773
                return;
774
            }
775
            if (record.get(areaKey)==null){
776
                return; //no status defined
777
            }
778

  
779
            NamedArea area = getNamedArea(state, areaUuid, null, null, null, null, null);
780
            if (area == null){
781
                logger.warn(state.getCurrentLine() + ": Area not recognized: " + area);
782
            }
783
            TaxonDescription desc = getTaxonDescription(state.getCurrentTaxon(), false, true);
784
            PresenceAbsenceTerm status =  makeProvinceStatus(areaKey, record, state);
785
            if (status == null){
786
                logger.warn(state.getCurrentLine() + ": Distribution Status could not be defined: " + record.get(areaKey));
787
            }
788
            Distribution distribution = Distribution.NewInstance(area, status);
789
            desc.addElement(distribution);
790
        } catch (UndefinedTransformerMethodException e) {
791
            e.printStackTrace();
792
        }
793

  
794
    }
496 795

  
497 796

  
797
    /**
798
     * @param areaKey
799
     * @param record
800
     * @param state
801
     * @return
802
     * @throws UndefinedTransformerMethodException
803
     */
804
    private PresenceAbsenceTerm makeProvinceStatus(String areaKey, HashMap<String, String> record, CubaImportState state) throws UndefinedTransformerMethodException {
805
        String statusStr = record.get(areaKey);
806
        if (statusStr == null){
807
            return null;
808
        }
809
        PresenceAbsenceTerm status = state.getTransformer().getPresenceTermByKey(statusStr);
810
        if (status == null){
811
            UUID statusUuid = state.getTransformer().getPresenceTermUuid(statusStr);
812
            status = getPresenceTerm(state, statusUuid, null, null, null, false);
813
        }
814
        return status;
815
    }
498 816

  
499 817

  
500
	/**
818
    /**
501 819
	 *  Stores parent-child, synonym and common name relationships
502 820
	 */
503 821
	@Override
......
507 825
	}
508 826

  
509 827

  
510

  
511
	/**
512
	 * @param state
513
	 * @param rank
514
	 * @param taxonNameStr
515
	 * @param authorStr
516
	 * @param nameStatus
517
	 * @param nc
518
	 * @return
519
	 */
520
	private TaxonBase createTaxon(CubaImportState state, Rank rank, String taxonNameStr,
521
			Class statusClass, NomenclaturalCode nc) {
522
		TaxonBase taxonBase;
523
		NonViralName taxonNameBase = null;
524
		if (nc == NomenclaturalCode.ICVCN){
525
			logger.warn("ICVCN not yet supported");
526

  
527
		}else{
528
			taxonNameBase =(NonViralName) nc.getNewTaxonNameInstance(rank);
529
			//NonViralName nonViralName = (NonViralName)taxonNameBase;
530
			INonViralNameParser parser = nameParser;//NonViralNameParserImpl.NewInstance();
531
			taxonNameBase = (NonViralName<BotanicalName>)parser.parseFullName(taxonNameStr, nc, rank);
532

  
533
			//taxonNameBase.setNameCache(taxonNameStr);
534

  
535
		}
536

  
537
		//Create the taxon
538
		Reference sec = state.getConfig().getSourceReference();
539
		// Create the status
540
		if (statusClass.equals(Taxon.class)){
541
			taxonBase = Taxon.NewInstance(taxonNameBase, sec);
542
		}else if (statusClass.equals(Synonym.class)){
543
			taxonBase = Synonym.NewInstance(taxonNameBase, sec);
544
		}else {
545
			Taxon taxon = Taxon.NewInstance(taxonNameBase, sec);
546
			taxon.setTaxonStatusUnknown(true);
547
			taxonBase = taxon;
548
		}
549
		return taxonBase;
550
	}
551

  
552
	private boolean makeParent(CubaImportState state, Taxon parentTaxon, Taxon childTaxon, Reference citation, String microCitation){
553
		boolean success = true;
554
		Reference sec = state.getConfig().getSourceReference();
555

  
556
//		Reference sec = parentTaxon.getSec();
557
		Classification tree = state.getTree(sec);
558
		if (tree == null){
559
			tree = makeTree(state, sec);
560
			tree.setTitleCache(state.getConfig().getSourceReferenceTitle(), true);
561
		}
562
		if (sec.equals(childTaxon.getSec())){
563
			success &=  (null !=  tree.addParentChild(parentTaxon, childTaxon, citation, microCitation));
564
		}else{
565
			logger.warn("No relationship added for child " + childTaxon.getTitleCache());
566
		}
567
		return success;
568
	}
569

  
570

  
571 828
    @Override
572 829
    protected boolean isIgnore(CubaImportState state) {
573 830
        return ! state.getConfig().isDoTaxa();
app-import/src/main/java/eu/etaxonomy/cdm/io/cuba/CubaImportConfigurator.java
40 40

  
41 41
	private boolean isDoTaxa;
42 42

  
43
	private static IInputTransformer defaultTransformer = new CubaTransformer();
43
    private boolean doVocabularies;
44
    public void setDoVocabularies(boolean doVocabularies) {this.doVocabularies = doVocabularies;}
45
    public boolean isDoVocabularies() {return doVocabularies;}
46

  
47
    private static IInputTransformer defaultTransformer = new CubaTransformer();
44 48

  
45 49
	public static CubaImportConfigurator NewInstance(URI source, ICdmDataSource destination){
46 50
		return new CubaImportConfigurator(source, destination);
app-import/src/main/java/eu/etaxonomy/cdm/io/cuba/CubaImportState.java
18 18

  
19 19
import eu.etaxonomy.cdm.io.excel.common.ExcelImportState;
20 20
import eu.etaxonomy.cdm.io.excel.common.ExcelRowBase;
21
import eu.etaxonomy.cdm.model.reference.Reference;
21 22
import eu.etaxonomy.cdm.model.taxon.Classification;
22 23
import eu.etaxonomy.cdm.model.taxon.Taxon;
24
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
23 25

  
24 26
/**
25 27
 * @author a.mueller
......
30 32
	private static final Logger logger = Logger.getLogger(CubaImportState.class);
31 33

  
32 34
	private final Map<String, Taxon> higherTaxonTaxonMap = new HashMap<String, Taxon>();
33
	private final Map<String, UUID> higherTaxonUuidMap = new HashMap<String, UUID>();
34 35

  
35
//	private CyprusRow cyprusRow;
36
//	private CyprusDistributionRow cyprusDistributionRow;
36
	private final Map<String, UUID> higherTaxonUuidMap = new HashMap<String, UUID>();
37 37

  
38
	//classification
38 39
	private Classification classification;
40
    public Classification getClassification() {return classification;}
41
    public void setClassification(Classification classification) {this.classification = classification;}
39 42

  
43
    //current taxon
44
    private Taxon currentTaxon;
45
    public Taxon getCurrentTaxon() {return currentTaxon;}
46
    public void setCurrentTaxon(Taxon currentTaxon) {this.currentTaxon = currentTaxon;}
40 47

  
41
	public CubaImportState(CubaImportConfigurator config) {
42
		super(config);
43
	}
48
    //rootNode
49
    private TaxonNode rootNode;
50
    public void setRootNode(TaxonNode rootNode) {this.rootNode = rootNode;}
51
    public TaxonNode getRootNode() { return rootNode;}
52

  
53
    private Reference<?> secReference;
54
    public Reference<?> getSecReference() {return secReference;}
55
    public void setSecReference(Reference<?> secReference) {this.secReference = secReference;}
44 56

  
45 57

  
46
	public boolean containsHigherTaxon(String higherName) {
47
		return higherTaxonTaxonMap.containsKey(higherName);
58
    //Constructor
59
    public CubaImportState(CubaImportConfigurator config) {
60
		super(config);
48 61
	}
49 62

  
63

  
64
    //higher taxon
65
    public Taxon getHigherTaxon(String higherName) {
66
        return higherTaxonTaxonMap.get(higherName);
67
    }
50 68
	public Taxon putHigherTaxon(String higherName, Taxon taxon) {
51 69
		return higherTaxonTaxonMap.put(higherName, taxon);
52 70
	}
53

  
54 71
	public Taxon removeHigherTaxon(String higherName) {
55 72
		return higherTaxonTaxonMap.remove(higherName);
56 73
	}
74
    public boolean containsHigherTaxon(String higherName) {
75
        return higherTaxonTaxonMap.containsKey(higherName);
76
    }
57 77

  
58
	public Taxon getHigherTaxon(String higherName) {
59
		return higherTaxonTaxonMap.get(higherName);
60
	}
61

  
62

  
63
	public boolean containsHigherTaxonUuid(String higherName) {
64
		return higherTaxonUuidMap.containsKey(higherName);
65
	}
66

  
78
    //higher taxon uuid
79
    public UUID getHigherTaxonUuid(String higherName) {
80
        return higherTaxonUuidMap.get(higherName);
81
    }
67 82
	public UUID putHigherTaxon(String higherName, UUID uuid) {
68 83
		return higherTaxonUuidMap.put(higherName, uuid);
69 84
	}
70

  
71 85
	public UUID removeHigherTaxonUuid(String higherName) {
72 86
		return higherTaxonUuidMap.remove(higherName);
73 87
	}
74

  
75
	public UUID getHigherTaxonUuid(String higherName) {
76
		return higherTaxonUuidMap.get(higherName);
77
	}
78

  
79

  
80
    /**
81
     * @return
82
     */
83
    public Classification getClassification() {
84
        return classification;
88
    public boolean containsHigherTaxonUuid(String higherName) {
89
        return higherTaxonUuidMap.containsKey(higherName);
85 90
    }
86 91

  
87

  
88
    /**
89
     * @param classification the classification to set
90
     */
91
    public void setClassification(Classification classification) {
92
        this.classification = classification;
93
    }
94

  
95

  
96

  
97

  
98

  
99
//	/**
100
//	 * @return the cyprusRow
101
//	 */
102
//	public CyprusRow getCyprusRow() {
103
//		return cyprusRow;
104
//	}
105
//
106
//	/**
107
//	 * @param cyprusRow the normalExplicitRow to set
108
//	 */
109
//	public void setCyprusRow(CyprusRow cyprusRow) {
110
//		this.cyprusRow = cyprusRow;
111
//	}
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff