Project

General

Profile

« Previous | Next » 

Revision b3273569

Added by Andreas Müller about 4 years ago

ref #1447 implement PESI commandline merger

View differences:

cdm-pesi/src/main/java/eu/etaxonomy/cdm/app/pesi/merging/FaunaEuErmsMergeActivator.java
1 1
package eu.etaxonomy.cdm.app.pesi.merging;
2 2

  
3
import java.io.BufferedReader;
4
import java.io.File;
5
import java.io.FileReader;
6
import java.io.IOException;
7 3
import java.util.ArrayList;
8 4
import java.util.HashSet;
9 5
import java.util.Iterator;
10 6
import java.util.List;
11 7
import java.util.Set;
12
import java.util.StringTokenizer;
13 8
import java.util.UUID;
14 9

  
15 10
import org.apache.log4j.Logger;
......
39 34
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
40 35
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
41 36

  
42
public class FaunaEuErmsMergeActivator {
37
public class FaunaEuErmsMergeActivator extends PesiMergeBase{
43 38

  
44
//	static final ICdmDataSource faunaEuropaeaSource = CdmDestinations.cdm_test_patricia();
45 39
	static final ICdmDataSource faunaEuropaeaSource = CdmDestinations.localH2();
46 40

  
47 41
	static final int faunaEuUuid = 0;
48 42
	static final int ermsUuid = 9;
49 43
	static final int rankFaunaEu = 4;
50 44
	static final int rankErms = 13;
51
	Classification faunaEuClassification;
52
	Classification ermsClassification;
45
	private Classification faunaEuClassification;
46
	private Classification ermsClassification;
53 47

  
54
	CdmApplicationController appCtrInit;
48
	private CdmApplicationController appCtrInit;
55 49

  
56 50
	private static final Logger logger = Logger.getLogger(FaunaEuErmsMergeActivator.class);
57 51

  
......
73 67

  
74 68
		//set the ranks of Agnatha and Gnathostomata to 50 instead of 45
75 69
		List<TaxonBase> taxaToChangeRank = new ArrayList<>();
70

  
76 71
		Pager<TaxonBase> agnatha = sc.appCtrInit.getTaxonService().findTaxaByName(TaxonBase.class, "Agnatha", null, null, null, "*", Rank.INFRAPHYLUM(), 10, 0, null);
77 72
		List<TaxonBase> agnathaList = agnatha.getRecords();
78 73
		taxaToChangeRank.addAll(agnathaList);
74

  
79 75
		Pager<TaxonBase> gnathostomata = sc.appCtrInit.getTaxonService().findTaxaByName(TaxonBase.class, "Gnathostomata", null, null, null, "*", Rank.INFRAPHYLUM(), 10, 0, null);
80 76
		List<TaxonBase> gnathostomataList = gnathostomata.getRecords();
81 77
		taxaToChangeRank.addAll(gnathostomataList);
82 78

  
83 79
		sc.setSpecificRank(taxaToChangeRank, Rank.SUPERCLASS());
84 80

  
85
		//ermsTaxon is accepted, fauna eu taxon is synonym
81
		//ermsTaxon is accepted, faunaEu taxon is synonym
86 82
		//ermsTaxon is synonym, faunaEu is accepted
87 83

  
88 84
		sc.mergeDiffStatus();
......
93 89

  
94 90
	}
95 91

  
96
	private static List<List<String>> readCsvFile(String fileName){
97

  
98
		List<List<String>> result = new ArrayList<>();
99
		File file = new File(fileName);
100
		BufferedReader bufRdr;
101
		try {
102
			bufRdr = new BufferedReader(new FileReader(file));
103
			String line = null;
104
			//read each line of text file
105
			while((line = bufRdr.readLine()) != null){
106
				StringTokenizer st = new StringTokenizer(line,",");
107
				List<String> rowList = new ArrayList<>();
108
				while (st.hasMoreTokens()){
109
					//get next token and store it in the array
110
					rowList.add(st.nextToken());
111
				}
112
			result.add(rowList);
113
			}
114
			//close the file
115
			bufRdr.close();
116
		} catch (IOException e) {
117
			e.printStackTrace();
118
		}
119
		return result;
120
	}
121

  
122

  
123 92
	private void mergeAuthors(){
124 93
		List<List<String>> authors = readCsvFile(sFileName + "_authors.csv");
125 94
		//authors: get firstAuthor if isFauEu = 1 otherwise get secondAuthor
126 95

  
127 96
		Iterator<List<String>> authorIterator = authors.iterator();
128
		List<String> row;
129
		TaxonBase<?> taxonFaunaEu;
130
		TaxonBase<?> taxonErms;
131
		List<TaxonBase<?>> taxaToSave = new ArrayList<>();
97
		List<TaxonBase<?>> taxaToSave = new ArrayList<>();  //TODO: needed?
132 98
		while (authorIterator.hasNext()){
133
			row = authorIterator.next();
99
		    List<String> row = authorIterator.next();
134 100
			UUID uuidFaunaEu = UUID.fromString(row.get(faunaEuUuid));
135 101
			UUID uuidErms = UUID.fromString(row.get(ermsUuid));
136
			taxonFaunaEu = appCtrInit.getTaxonService().find(uuidFaunaEu);
137
			taxonErms = appCtrInit.getTaxonService().find(uuidErms);
102
			TaxonBase<?> taxonFaunaEu = appCtrInit.getTaxonService().find(uuidFaunaEu);
103
			TaxonBase<?> taxonErms = appCtrInit.getTaxonService().find(uuidErms);
138 104
// which information should be used can be found in last row -> needs to be done manually
139 105
			if (Integer.parseInt(row.get(18)) == 1){
140 106
				//isFaunaEu = 1 -> copy the author of Fauna Europaea to Erms
......
169 135
		List<List<String>> diffStatus = readCsvFile(sFileName + "_status.csv");
170 136

  
171 137
		//find all taxa accepted in erms, but synonyms in FauEu  and the same rank
172
		List<List<String>> accErmsSynFaunaEu = new ArrayList<List<String>>();
138
		List<List<String>> accErmsSynFaunaEu = new ArrayList<>();
173 139
		for (List<String> rowList: diffStatus){
174 140
			if ((rowList.get(5).equals("synonym")) && (rowList.get(rankFaunaEu).equals(rowList.get(rankErms)))){
175 141
				//both conditions are true
cdm-pesi/src/main/java/eu/etaxonomy/cdm/app/pesi/merging/PesiCommandLineMerge.java
1
/**
2
* Copyright (C) 2020 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.app.pesi.merging;
10

  
11
import java.util.Optional;
12
import java.util.Set;
13
import java.util.UUID;
14
import java.util.stream.Collectors;
15

  
16
import org.apache.log4j.Logger;
17
import org.springframework.transaction.TransactionStatus;
18

  
19
import eu.etaxonomy.cdm.api.service.DeleteResult;
20
import eu.etaxonomy.cdm.api.service.config.TaxonDeletionConfigurator;
21
import eu.etaxonomy.cdm.app.common.CdmDestinations;
22
import eu.etaxonomy.cdm.common.CdmRegEx;
23
import eu.etaxonomy.cdm.common.CdmUtils;
24
import eu.etaxonomy.cdm.database.DbSchemaValidation;
25
import eu.etaxonomy.cdm.database.ICdmDataSource;
26
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController;
27
import eu.etaxonomy.cdm.io.common.mapping.out.DbLastActionMapper;
28
import eu.etaxonomy.cdm.io.pesi.out.PesiTransformer;
29
import eu.etaxonomy.cdm.model.common.Annotation;
30
import eu.etaxonomy.cdm.model.common.CdmBase;
31
import eu.etaxonomy.cdm.model.common.Credit;
32
import eu.etaxonomy.cdm.model.common.Extension;
33
import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
34
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
35
import eu.etaxonomy.cdm.model.common.Marker;
36
import eu.etaxonomy.cdm.model.description.TaxonDescription;
37
import eu.etaxonomy.cdm.model.name.TaxonName;
38
import eu.etaxonomy.cdm.model.taxon.Synonym;
39
import eu.etaxonomy.cdm.model.taxon.Taxon;
40
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
41
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
42
import eu.etaxonomy.cdm.model.taxon.TaxonRelationship;
43

  
44
/**
45
 * @author a.mueller
46
 * @since 20.01.2020
47
 */
48
public class PesiCommandLineMerge extends PesiMergeBase {
49

  
50
    private static final Logger logger = Logger.getLogger(PesiCommandLineMerge.class);
51

  
52
    static final ICdmDataSource pesiSource = CdmDestinations.cdm_pesi2019_final();
53

  
54
    private CdmIoApplicationController app;
55

  
56
    private void invoke(ICdmDataSource source){
57
        app = CdmIoApplicationController.NewInstance(source, DbSchemaValidation.VALIDATE, false);
58

  
59
        while(booleanAnswer("New merge")){
60
            TransactionStatus tx = app.startTransaction();
61
            Taxon[] taxa = null;
62
            while (taxa == null) {
63
                taxa = readTaxa();
64
            }
65
            boolean commit = compareTaxa(taxa);
66
            if (commit){
67
                moveTaxonInformation(taxa[0],taxa[1]);
68
            }
69
            if (commit){
70
                app.commitTransaction(tx);
71
                if (booleanAnswer("Information moved. Delete old taxon")){
72
                    removeTaxon(taxa[0]);
73
                }
74
            }else{
75
                app.rollbackTransaction(tx);
76
            }
77
        }
78
    }
79

  
80
    private boolean compareTaxa(Taxon[] taxa) {
81
        Taxon removeTaxon = taxa[0];
82
        Taxon stayTaxon = taxa[1];
83
        String nc1 = removeTaxon.getName().getNameCache();
84
        String nc2 = stayTaxon.getName().getNameCache();
85

  
86
        String ft1 = removeTaxon.getName().getFullTitleCache();
87
        String ft2 = stayTaxon.getName().getFullTitleCache();
88
        System.out.println("Remove: " + ft1);
89
        System.out.println("Stay  : " + ft2);
90
        if (!nc1.equals(nc2)){
91
            return booleanAnswer("Name Cache differs!!! Do you really want to merge???");
92
        }else if (!ft1.equals(ft2)){
93
            return booleanAnswer("Full title cache differs! Do you really want to merge anyway");
94
        }else{
95
            return booleanAnswer("Same title. Merge");
96
        }
97
    }
98

  
99
    private void removeTaxon(Taxon taxon) {
100
        TaxonNode nodeToRemove = taxon.getTaxonNodes().iterator().next();
101
        TaxonDeletionConfigurator config = new TaxonDeletionConfigurator();
102
        DeleteResult result = app.getTaxonNodeService().deleteTaxonNode(nodeToRemove.getUuid(), config);
103
        if (!result.isOk()){
104
            System.out.println("Remove taxon was not successful.");
105
        }
106
    }
107

  
108
    private boolean booleanAnswer(String message) {
109
        String answer = CdmUtils.readInputLine(message + " (y/n)? ");
110
        return answer.equalsIgnoreCase("y");
111
    }
112

  
113
    private boolean moveTaxonInformation(Taxon removeTaxon, Taxon stayTaxon) {
114
        try {
115
            //mergeNames;
116
            TaxonName removeName = CdmBase.deproxy(removeTaxon.getName());
117
            TaxonName stayName = CdmBase.deproxy(stayTaxon.getName());
118
            mergeSources(removeName, stayName);
119
            mergeAnnotations(removeName, stayName);
120
            mergeMarkers(removeName, stayName);
121
            mergeExtensions(removeName, stayName);
122
            mergeCredits(removeName, stayName);
123
            mergeNameRelationships(removeName, stayName);
124
            mergeHybridRelationships(removeName, stayName);
125
            mergeNameDescriptions(removeName, stayName);
126

  
127
            //mergeTaxa;
128
            mergeSources(removeTaxon, stayTaxon);
129
            mergeAnnotations(removeTaxon, stayTaxon);
130
            mergeMarkers(removeTaxon, stayTaxon);
131
            mergeExtensions(removeTaxon, stayTaxon);
132
            mergeCredits(removeTaxon, stayTaxon);
133
            mergeDescriptions(removeTaxon, stayTaxon);
134
            mergeSynonyms(removeTaxon, stayTaxon);
135
            mergeChildren(removeTaxon, stayTaxon);
136
            mergeTaxonRelations(removeTaxon, stayTaxon);
137
            return booleanAnswer("Commit moved information");
138
        } catch (CloneNotSupportedException e) {
139
            e.printStackTrace();
140
            return false;
141
        }
142
    }
143

  
144
    private void mergeTaxonRelations(Taxon removeTaxon, Taxon stayTaxon) {
145
        for (TaxonRelationship rel : removeTaxon.getRelationsToThisTaxon()){
146
            System.out.println("Move taxon relationship: " + rel.getType().getTitleCache() + ": " + rel.getFromTaxon().getTitleCache());
147

  
148
            rel.setToTaxon(stayTaxon);
149
//            if (!synonymExists()){
150
//                //TODO homotypical group
151
//                stayTaxon.addSynonym(synonym, synonym.getType());
152
//            }else{
153
//                //TODO merge synonym names
154
//            }
155
        }
156
        if(!removeTaxon.getRelationsFromThisTaxon().isEmpty()){
157
            logger.warn("Taxon-from-relations not yet implemented");
158
        }
159
    }
160

  
161
    private void mergeNameDescriptions(TaxonName removeName, @SuppressWarnings("unused") TaxonName stayName) {
162
        if(!removeName.getDescriptions().isEmpty()){
163
            logger.warn("Name description exist but merge not yet implemented");
164
        }
165
    }
166

  
167
    private void mergeHybridRelationships(TaxonName removeName, @SuppressWarnings("unused") TaxonName stayName) {
168
        if(!removeName.getHybridChildRelations().isEmpty()){
169
            logger.warn("Hybrid child relation exist but merge not yet implemented");
170
        }
171
        if(!removeName.getHybridParentRelations().isEmpty()){
172
            logger.warn("Hybrid parent relation exist but merge not yet implemented");
173
        }
174
    }
175

  
176
    private void mergeNameRelationships(TaxonName removeName, @SuppressWarnings("unused") TaxonName stayName) {
177
        if(!removeName.getNameRelations().isEmpty()){
178
            logger.warn("Name relations exist but merge not yet implemented");
179
        }
180
    }
181

  
182
    private void mergeChildren(Taxon removeTaxon, Taxon stayTaxon) {
183
        TaxonNode removeNode = removeTaxon.getTaxonNodes().iterator().next();
184
        TaxonNode stayNode = stayTaxon.getTaxonNodes().iterator().next();
185
        Set<UUID> removeNodeChildrenUuids = removeNode.getChildNodes()
186
                .stream().map(tn->tn.getUuid()).collect(Collectors.toSet());
187

  
188
        if(!removeNodeChildrenUuids.isEmpty()){
189
            app.getTaxonNodeService().moveTaxonNodes(removeNodeChildrenUuids,
190
                    stayNode.getUuid(), 0, null);
191
            System.out.println("Child nodes moved: " + removeNodeChildrenUuids.size());
192

  
193
        }
194
    }
195

  
196
    private void mergeSynonyms(Taxon removeTaxon, Taxon stayTaxon) {
197
        for (Synonym synonym : removeTaxon.getSynonyms()){
198
            if (!synonymExists()){
199
                //TODO homotypical group
200
                stayTaxon.addSynonym(synonym, synonym.getType());
201
            }else{
202
                //TODO merge synonym names
203
            }
204
        }
205
    }
206

  
207
    private boolean synonymExists() {
208
        logger.warn("Synonym dulicate check - not yet implemented");
209
        return false;
210
    }
211

  
212
    private void mergeDescriptions(Taxon remove, Taxon stay) {
213
        //TODO handle duplicates for taxon descriptions
214
        for (TaxonDescription description: remove.getDescriptions()){
215
            System.out.println("Move taxon description: " + description.getTitleCache());
216
            stay.addDescription((TaxonDescription)description.clone());
217
        }
218
    }
219

  
220
    private void mergeCredits(IdentifiableEntity<?> removeEntity,
221
            IdentifiableEntity<?> stayEntity) throws CloneNotSupportedException {
222
        String className = removeEntity.getClass().getSimpleName();
223
        for (Credit credit: removeEntity.getCredits()){
224
            System.out.println("Move "+className+" credit: " + credit.toString());
225
            stayEntity.addCredit((Credit)credit.clone());
226
        }
227
    }
228

  
229
    private void mergeExtensions(IdentifiableEntity<?> removeEntity,
230
            IdentifiableEntity<?> stayEntity) throws CloneNotSupportedException {
231
        String className = removeEntity.getClass().getSimpleName();
232
        for (Extension extension: removeEntity.getExtensions()){
233
            System.out.println("Move "+className+" extension: " + extension.getType().getTitleCache() + ": " + extension.getValue());
234
            stayEntity.addExtension((Extension)extension.clone());
235
        }
236
    }
237

  
238
    private void mergeMarkers(IdentifiableEntity<?> removeEntity,
239
            IdentifiableEntity<?> stayEntity) throws CloneNotSupportedException {
240
        String className = removeEntity.getClass().getSimpleName();
241
        for (Marker marker: removeEntity.getMarkers()){
242
            if (!filterMarker(marker, removeEntity, stayEntity)){
243
                System.out.println("Move "+className+" marker: " + marker.getMarkerType().getTitleCache() + ": " + marker.getValue());
244
                stayEntity.addMarker((Marker)marker.clone());
245
            }
246
        }
247
    }
248

  
249
    private void mergeAnnotations(IdentifiableEntity<?> removeEntity,
250
            IdentifiableEntity<?> stayEntity) throws CloneNotSupportedException {
251
        String className = removeEntity.getClass().getSimpleName();
252
        for (Annotation annotation: removeEntity.getAnnotations()){
253
            if (!filterAnnotation(annotation, removeEntity, stayEntity)){
254
                System.out.println("Move "+className+" note: " + annotation.getAnnotationType().getTitleCache() + ": " + annotation.getText());
255
                handleRemoveAnnotation(annotation, removeEntity, stayEntity);
256
                stayEntity.addAnnotation((Annotation)annotation.clone());
257
            }
258
        }
259
    }
260

  
261
    private void mergeSources(IdentifiableEntity<?> removeEntity,
262
            IdentifiableEntity<?> stayEntity) throws CloneNotSupportedException {
263
        String className = removeEntity.getClass().getSimpleName();
264
        for (IdentifiableSource source: removeEntity.getSources()){
265
            System.out.println("Move "+className+" source: " + source.getType().getMessage() + ": " + source.getCitation().getTitleCache() + "; " + source.getIdInSource() + "/" + source.getIdNamespace());
266
            stayEntity.addSource((IdentifiableSource)source.clone());
267
        }
268
    }
269

  
270
    private boolean filterMarker(Marker marker, @SuppressWarnings("unused") IdentifiableEntity<?> removeEntity,
271
            IdentifiableEntity<?> stayEntity) {
272
        if (isNoLastActionMarker(marker)){
273
            for (Annotation annotation : stayEntity.getAnnotations()){
274
                if (isLastActionDateAnnotation(annotation)){
275
                        return true;
276
                }
277
            }
278
        }
279
        return false;
280
    }
281

  
282
    private boolean isLastActionDateAnnotation(Annotation annotation) {
283
        return annotation.getAnnotationType().getUuid().equals(DbLastActionMapper.uuidAnnotationTypeLastActionDate)
284
            && !isBlank(annotation.getText());
285
    }
286

  
287
    private void handleRemoveAnnotation(Annotation annotation,
288
            @SuppressWarnings("unused") IdentifiableEntity<?> removeEntity,
289
            IdentifiableEntity<?> stayEntity) {
290
        if (isLastActionDateAnnotation(annotation)){
291
            Optional<Marker> noLastActionMarker = stayEntity.getMarkers().stream().filter(m->isNoLastActionMarker(m)).findFirst();
292
            if (noLastActionMarker.isPresent()){
293
                stayEntity.removeMarker(noLastActionMarker.get());
294
                System.out.println("  NoLastActionDate annotation removed from 'stay' " + stayEntity.getClass().getSimpleName());
295
            }
296
        };
297
    }
298

  
299
    private boolean isNoLastActionMarker(Marker marker) {
300
        return marker.getMarkerType().getUuid().equals(PesiTransformer.uuidMarkerTypeHasNoLastAction)
301
                && marker.getValue() == true;
302
    }
303

  
304
    @SuppressWarnings("unused")
305
    private boolean filterAnnotation(Annotation annotation, IdentifiableEntity<?> removeEntity, IdentifiableEntity<?> stayEntity) {
306
        return false;
307
    }
308

  
309
    private Taxon[] readTaxa() {
310

  
311
        try {
312
            Taxon taxon1 = readTaxon("Taxon to be removed");
313
            Taxon taxon2 = readTaxon("Taxon to stay");
314
            return new Taxon[]{taxon1, taxon2};
315
        } catch (Exception e) {
316
            System.out.println("Reading taxon not successful");
317
            return null;
318
        }
319
    }
320

  
321
    private Taxon readTaxon(String message) {
322
        String strTaxon = CdmUtils.readInputLine(message + ": ");
323
        TaxonBase<?> taxon;
324
        if (strTaxon.matches("\\d{1,10}")){
325
            taxon = app.getTaxonService().find(Integer.valueOf(strTaxon));
326
        }else if (strTaxon.matches(CdmRegEx.UUID_RE)){
327
            taxon = app.getTaxonService().find(UUID.fromString(strTaxon));
328
        }else{
329
            throw new IllegalArgumentException("Input not recognized as id or uuid.");
330
        }
331
        if (taxon == null){
332
            throw new IllegalArgumentException("Input was not a valid taxon id.");
333
        }else if (taxon.isInstanceOf(Synonym.class)){
334
            throw new IllegalArgumentException("Input was synonym but accepted taxon required.");
335
        }else{
336
            return CdmBase.deproxy(taxon, Taxon.class);
337
        }
338
    }
339

  
340

  
341

  
342
    public static void main(String[] args) {
343
        PesiCommandLineMerge merger = new PesiCommandLineMerge();
344
        merger.invoke(pesiSource);
345
        System.exit(0);
346
    }
347
}
cdm-pesi/src/main/java/eu/etaxonomy/cdm/app/pesi/merging/PesiMergeBase.java
1
/**
2
* Copyright (C) 2020 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.app.pesi.merging;
10

  
11
import java.io.FileReader;
12
import java.io.IOException;
13
import java.util.ArrayList;
14
import java.util.Arrays;
15
import java.util.List;
16

  
17
import org.apache.commons.lang3.StringUtils;
18

  
19
import au.com.bytecode.opencsv.CSVReader;
20

  
21
/**
22
 * Base class for PESI merge classes.
23
 *
24
 * @author a.mueller
25
 * @since 20.01.2020
26
 */
27
public abstract class PesiMergeBase {
28

  
29
    protected static List<List<String>> readCsvFile(String fileName){
30
        List<List<String>> result = new ArrayList<>();
31
        try {
32
            CSVReader reader = new CSVReader(new FileReader(fileName));
33
            String[] row;
34
            while ((row = reader.readNext()) != null){
35
                result.add(Arrays.asList(row));
36
            }
37
            reader.close();
38
        } catch (IOException e1) {
39
            e1.printStackTrace();
40
        }
41
        return result;
42
    }
43

  
44
    protected boolean isBlank(String str) {
45
        return StringUtils.isBlank(str);
46
    }
47
}

Also available in: Unified diff