Project

General

Profile

Download (11.4 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.dwca.in;
10

    
11
import java.util.ArrayList;
12
import java.util.HashSet;
13
import java.util.List;
14
import java.util.Map;
15
import java.util.Set;
16
import java.util.UUID;
17

    
18
import org.apache.log4j.Logger;
19

    
20
import eu.etaxonomy.cdm.common.CdmUtils;
21
import eu.etaxonomy.cdm.io.common.TdwgAreaProvider;
22
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
23
import eu.etaxonomy.cdm.io.stream.IPartitionableConverter;
24
import eu.etaxonomy.cdm.io.stream.IReader;
25
import eu.etaxonomy.cdm.io.stream.ListReader;
26
import eu.etaxonomy.cdm.io.stream.MappedCdmBase;
27
import eu.etaxonomy.cdm.io.stream.PartitionableConverterBase;
28
import eu.etaxonomy.cdm.io.stream.StreamItem;
29
import eu.etaxonomy.cdm.io.stream.terms.TermUri;
30
import eu.etaxonomy.cdm.model.common.CdmBase;
31
import eu.etaxonomy.cdm.model.description.Distribution;
32
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
33
import eu.etaxonomy.cdm.model.description.TaxonDescription;
34
import eu.etaxonomy.cdm.model.location.NamedArea;
35
import eu.etaxonomy.cdm.model.reference.Reference;
36
import eu.etaxonomy.cdm.model.taxon.Taxon;
37
import eu.etaxonomy.cdm.model.term.TermVocabulary;
38

    
39
/**
40
 * @author a.mueller
41
 * @since 22.11.2011
42
 *
43
 */
44
public class GbifDistributionCsv2CdmConverter extends PartitionableConverterBase<DwcaDataImportConfiguratorBase, DwcaDataImportStateBase<DwcaDataImportConfiguratorBase>>
45
						implements IPartitionableConverter<StreamItem, IReader<CdmBase>, String>{
46

    
47
	@SuppressWarnings("unused")
48
	private static final Logger logger = Logger.getLogger(GbifDistributionCsv2CdmConverter.class);
49

    
50
	private static final String CORE_ID = "coreId";
51

    
52
	/**
53
	 * @param state
54
	 */
55
	public GbifDistributionCsv2CdmConverter(DwcaDataImportStateBase state) {
56
		super(state);
57
	}
58

    
59
	@Override
60
    public IReader<MappedCdmBase<? extends CdmBase>> map(StreamItem item ){
61
		List<MappedCdmBase<? extends CdmBase>> resultList = new ArrayList<>();
62

    
63
		Map<String, String> csv = item.map;
64
		Reference sourceReference = state.getTransactionalSourceReference();
65
		String sourceReferecenDetail = null;
66

    
67
		String id = getSourceId(item);
68
		Taxon taxon = getTaxonBase(id, item, Taxon.class, state);
69
		if (taxon != null){
70

    
71
		    //area
72
		    String locality = item.get(TermUri.DWC_LOCALITY);
73
			String locationId = item.get(TermUri.DWC_LOCATION_ID);
74
			NamedArea area = getAreaByLocationId(item, locationId, locality, resultList);
75
			if (area != null){
76
				MappedCdmBase<? extends CdmBase>  mcb = new MappedCdmBase<>(TermUri.DWC_LOCATION_ID, csv.get(TermUri.DWC_LOCATION_ID.toString()), area);
77
				resultList.add(mcb);
78
			}else if (! config.isExcludeLocality()){
79
				area = getAreaByLocality(item, locality);
80
				MappedCdmBase<? extends CdmBase>  mcb = new MappedCdmBase<>(TermUri.DWC_LOCALITY, csv.get(TermUri.DWC_LOCALITY.toString()), area);
81
				resultList.add(mcb);
82
			}
83

    
84
			//status
85
			String establishmentMeans = item.get(TermUri.DWC_ESTABLISHMENT_MEANS);
86
	        String occurrenceStatus = item.get(TermUri.DWC_OCCURRENCE_STATUS);
87
			PresenceAbsenceTerm status = getPresenceAbsenceStatus(item, establishmentMeans, occurrenceStatus, resultList);
88

    
89
			if (area != null){
90

    
91
				TaxonDescription desc = getTaxonDescription(taxon, false);
92

    
93
				Distribution distribution = Distribution.NewInstance(area, status);
94
				desc.addElement(distribution);
95

    
96
				//save taxon
97
				MappedCdmBase<? extends CdmBase>  mcb = new MappedCdmBase<>(item.term, csv.get(CORE_ID.toString()), taxon);
98
				resultList.add(mcb);
99
			}
100

    
101
		}else{
102
			String message = "Can't retrieve taxon from database for id '%s'";
103
			fireWarningEvent(String.format(message, id), item, 12);
104
		}
105

    
106
		//return
107
		return new ListReader<>(resultList);
108
	}
109

    
110

    
111

    
112
	/**
113
     * @param item
114
	 * @param occurrenceStatus
115
	 * @param establishmentMeans
116
     * @param resultList
117
     * @return
118
     */
119
    private PresenceAbsenceTerm getPresenceAbsenceStatus(StreamItem item,
120
            String establishmentMeans, String occurrenceStatus, List<MappedCdmBase<? extends CdmBase>> resultList) {
121

    
122
        PresenceAbsenceTerm status = null;
123
        if (isBlank(establishmentMeans) && isBlank(occurrenceStatus)){
124
            status = PresenceAbsenceTerm.PRESENT();
125
        }else{
126
            String statusStr = CdmUtils.concat(" - ", occurrenceStatus, establishmentMeans);
127
            String namespace = PresenceAbsenceTerm.class.getCanonicalName();
128
            List<PresenceAbsenceTerm> result = state.get(namespace, statusStr, PresenceAbsenceTerm.class);
129
            try{
130
                if (result.isEmpty()){
131
                    PresenceAbsenceTerm newStatus = state.getTransformer().getPresenceTermByKey(statusStr);
132
                    if (newStatus != null){
133
                        return newStatus;
134
                    }
135
                    //try to find in cdm
136
                    newStatus = getExistingPresenceAbsenceTerm(statusStr);
137
                    if (newStatus != null){
138
                        return newStatus;
139
                    }
140

    
141
                    UUID statusUuid = state.getTransformer().getPresenceTermUuid(statusStr);
142
                    newStatus = state.getCurrentIO().getPresenceTerm(state, statusUuid, statusStr, statusStr, null, false);
143

    
144
                    //should not happen
145
                    if (newStatus == null){
146
                        newStatus = PresenceAbsenceTerm.NewPresenceInstance(statusStr, statusStr, statusStr);
147
                        state.getCurrentIO().saveNewTerm(newStatus);
148
                        MappedCdmBase<? extends CdmBase>  mcb = new MappedCdmBase<>(namespace, statusStr, newStatus);
149
                        resultList.add(mcb);
150
                    }
151

    
152
                    state.putMapping(namespace, statusStr, newStatus);
153
                    return newStatus;
154
                }
155
                if (result.size() > 1){
156
                    String message = "There is more than 1 cdm entity matching given occurrence status/establishment means '%s'."
157
                            + " I take an arbitrary one.";
158
                    fireWarningEvent(String.format(message, statusStr), item, 4);
159
                }
160
                return result.iterator().next();
161
            } catch (UndefinedTransformerMethodException e) {
162
                String message = "GetNamedArea not yet supported by DwcA-Transformer. This should not have happend. Please contact your application developer.";
163
                fireWarningEvent(message, item, 8);
164
                return null;
165
            }
166
        }
167
        return status;
168
    }
169

    
170
    /**
171
     * @param statusStr
172
     * @return
173
     */
174
    private PresenceAbsenceTerm getExistingPresenceAbsenceTerm(String statusStr) {
175
        TermVocabulary<PresenceAbsenceTerm> voc = PresenceAbsenceTerm.PRESENT().getVocabulary();
176
        for (PresenceAbsenceTerm status: voc.getTerms()){
177
            if (statusStr.equalsIgnoreCase(status.getLabel())){
178
                return status;
179
            }
180
        }
181
        return null;
182
    }
183

    
184
    private NamedArea getAreaByLocality(StreamItem item, String locality) {
185
		String namespace = TermUri.DWC_LOCALITY.toString();
186
		List<NamedArea> result = state.get(namespace, locality, NamedArea.class);
187
		if (result.isEmpty()){
188
			NamedArea newArea = NamedArea.NewInstance(locality, locality, locality);
189
			newArea.setTitleCache(locality, true);
190
			return newArea;
191
		}
192
		if (result.size() > 1){
193
			String message = "There is more than 1 cdm entity matching given locality '%s'. I take an arbitrary one.";
194
			fireWarningEvent(String.format(message, locality), item, 4);
195
		}
196
		return result.iterator().next();
197
	}
198

    
199
	private NamedArea getAreaByLocationId(StreamItem item, String locationId, String newLabel, List<MappedCdmBase<? extends CdmBase>> resultList) {
200
		String namespace = TermUri.DWC_LOCATION_ID.toString();
201
		if (isBlank(locationId)){
202
		    return null;
203
		}
204
		List<NamedArea> result = state.get(namespace, locationId, NamedArea.class);
205
		try{
206
    		if (result.isEmpty()){
207
    		    NamedArea newArea = state.getTransformer().getNamedAreaByKey(locationId);
208
    		    if (newArea != null){
209
                    return newArea;
210
                }
211
    		  //try to find in cdm
212
                newArea = getTdwgArea(locationId);
213
                if (newArea != null){
214
                    return newArea;
215
                }
216

    
217
    		    String label = isNotBlank(newLabel)? newLabel : locationId;
218
    		    UUID namedAreaUuid = state.getTransformer().getNamedAreaUuid(locationId);
219
    		    newArea = state.getCurrentIO().getNamedArea(state, namedAreaUuid, label, label, locationId, null);
220

    
221
    		    //should not happen
222
    		    if (newArea == null){
223
    		        newArea = NamedArea.NewInstance(label, label, locationId);
224
    //            state.putMapping(namespace, type, newArea);
225
                    state.getCurrentIO().saveNewTerm(newArea);
226
                    MappedCdmBase<? extends CdmBase>  mcb = new MappedCdmBase<>(namespace, locationId, newArea);
227
                    resultList.add(mcb);
228
                }
229

    
230

    
231
    			state.putMapping(namespace, locationId, newArea);
232
    			return newArea;
233
    		}
234
    		if (result.size() > 1){
235
    			String message = "There is more than 1 cdm entity matching given locationId '%s'. I take an arbitrary one.";
236
    			fireWarningEvent(String.format(message, locationId), item, 4);
237
    		}
238
    		return result.iterator().next();
239
        } catch (UndefinedTransformerMethodException e) {
240
            String message = "GetNamedArea not yet supported by DwcA-Transformer. This should not have happend. Please contact your application developer.";
241
            fireWarningEvent(message, item, 8);
242
            return null;
243
        }
244
	}
245

    
246
    /**
247
     * @param locationId
248
     * @return
249
     */
250
    protected NamedArea getTdwgArea(String locationId) {
251
        if (locationId == null){
252
            return null;
253
        }else if (locationId.startsWith("TDWG:")){
254
            locationId = locationId.substring("TDWG:".length()); //CoL case
255
        }
256
        return TdwgAreaProvider.getAreaByTdwgAbbreviation(locationId);
257
    }
258

    
259
	@Override
260
	public String getSourceId(StreamItem item) {
261
		String id = item.get(CORE_ID);
262
		return id;
263
	}
264

    
265

    
266
//********************** PARTITIONABLE **************************************/
267

    
268
	@Override
269
	protected void makeForeignKeysForItem(StreamItem item, Map<String, Set<String>> fkMap) {
270
		String value;
271
		String key;
272
		//taxon
273
		if ( hasValue(value = item.get(CORE_ID))){
274
			key = TermUri.DWC_TAXON.toString();
275
			Set<String> keySet = getKeySet(key, fkMap);
276
			keySet.add(value);
277
		}
278

    
279
		//areaId
280

    
281
		String locationId = item.get(TermUri.DWC_LOCATION_ID);
282
		if ( hasValue(value = locationId)){
283
			key = TermUri.DWC_LOCATION_ID.toString();
284
			Set<String> keySet = getKeySet(key, fkMap);
285
			keySet.add(value);
286
		}
287

    
288
	}
289

    
290

    
291
	@Override
292
	public Set<String> requiredSourceNamespaces() {
293
		Set<String> result = new HashSet<>();
294
 		result.add(TermUri.DWC_TAXON.toString());
295
 		result.add(TermUri.DWC_LOCATION_ID.toString());
296
 		result.add(TermUri.DWC_LOCALITY.toString());
297
 		return result;
298
	}
299

    
300
//******************* TO STRING ******************************************/
301

    
302
	@Override
303
	public String toString(){
304
		return this.getClass().getName();
305
	}
306

    
307

    
308
}
(13-13/17)