fix #6095 log duplicate registrations during import
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / app / cuba / CubaActivator.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.app.cuba;
11
12 import java.net.URI;
13 import java.util.UUID;
14
15 import org.apache.log4j.Logger;
16
17 import eu.etaxonomy.cdm.api.application.CdmApplicationController;
18 import eu.etaxonomy.cdm.api.application.ICdmApplicationConfiguration;
19 import eu.etaxonomy.cdm.api.service.ITermService;
20 import eu.etaxonomy.cdm.app.common.CdmDestinations;
21 import eu.etaxonomy.cdm.database.DbSchemaValidation;
22 import eu.etaxonomy.cdm.database.ICdmDataSource;
23 import eu.etaxonomy.cdm.io.common.CdmDefaultImport;
24 import eu.etaxonomy.cdm.io.common.IImportConfigurator.CHECK;
25 import eu.etaxonomy.cdm.io.cuba.CubaImportConfigurator;
26 import eu.etaxonomy.cdm.io.cuba.CubaTransformer;
27 import eu.etaxonomy.cdm.model.agent.Person;
28 import eu.etaxonomy.cdm.model.description.Feature;
29 import eu.etaxonomy.cdm.model.description.FeatureNode;
30 import eu.etaxonomy.cdm.model.description.FeatureTree;
31 import eu.etaxonomy.cdm.model.reference.Reference;
32 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
33
34 /**
35 * @author a.mueller
36 * @created 04.01.2016
37 */
38 public class CubaActivator {
39 private static final Logger logger = Logger.getLogger(CubaActivator.class);
40
41 //database validation status (create, update, validate ...)
42 static DbSchemaValidation hbm2dll = DbSchemaValidation.VALIDATE;
43
44 static final ICdmDataSource cdmDestination = CdmDestinations.localH2();
45 // static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_local_mysql_test();
46 // static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cuba_production();
47
48 static boolean invers = true;
49
50 boolean doAsteraceae = include;
51 boolean doConvolvulaceae = include;
52 boolean doCyperaceae = include;
53 boolean doDicotA_C = include;
54 boolean doDicotD_M = include;
55 boolean doDicotN_Z = include;
56 boolean doEuphorbiaceae = include;
57 boolean doFabaceae = include;
58 boolean doGymnospermae = include;
59 boolean doLamVerbenaceae = include;
60 boolean doMalpighiaceae = include;
61 boolean doMelastomataceae = include;
62 boolean doMonocots = include;
63 boolean doMyrtaceae = include;
64 boolean doOrchidaceae = include;
65 boolean doRubiaceae = include;
66 boolean doUrticaceae = include;
67
68 static boolean include = !invers;
69
70
71 //feature tree uuid
72 public static final UUID featureTreeUuid = UUID.fromString("dad6b9b5-693f-4367-a7aa-076cc9c99476");
73
74 //classification
75 static final UUID classificationUuid = UUID.fromString("5de394de-9c76-4b97-b04d-71be31c7f44b");
76 private static final String classificationName = "Cuba Checklist";
77
78 static final String sourceReferenceTitle = "Cuba Checklist Word Documents";
79
80 //check - import
81 static final CHECK check = CHECK.IMPORT_WITHOUT_CHECK;
82
83 boolean doVocabularies = (hbm2dll == DbSchemaValidation.CREATE);
84 static final boolean doTaxa = false;
85 static final boolean doDeduplicate = true;
86
87
88 private void doImport(ICdmDataSource cdmDestination){
89
90 URI source = monocots(); //just any
91
92 //make Source
93 CubaImportConfigurator config= CubaImportConfigurator.NewInstance(source, cdmDestination);
94 config.setClassificationUuid(classificationUuid);
95 config.setClassificationName(classificationName);
96 config.setCheck(check);
97 // config.setDoDistribution(doDistribution);
98 config.setDoTaxa(doTaxa);
99 config.setDbSchemaValidation(hbm2dll);
100 config.setSourceReferenceTitle(sourceReferenceTitle);
101 config.setDoVocabularies(doVocabularies);
102
103 CdmDefaultImport<CubaImportConfigurator> myImport = new CdmDefaultImport<CubaImportConfigurator>();
104
105
106 //...
107 if (doAsteraceae){
108 doSingleSource(asteraceae(), config, myImport, doVocabularies);
109 }
110 if (doConvolvulaceae){
111 doSingleSource(convolvulaceae(), config, myImport, doVocabularies);
112 }
113 if (doCyperaceae){
114 doSingleSource(cyperaceae(), config, myImport, doVocabularies);
115 }
116 if (doDicotA_C){
117 doSingleSource(dicotA_C(), config, myImport, doVocabularies);
118 }
119 if (doDicotD_M){
120 doSingleSource(dicotD_M(), config, myImport, doVocabularies);
121 }
122 if (doDicotN_Z){
123 doSingleSource(dicotN_Z(), config, myImport, doVocabularies);
124 }
125 if (doEuphorbiaceae){
126 doSingleSource(euphorbiaceae(), config, myImport, doVocabularies);
127 }
128 if (doFabaceae){
129 doSingleSource(fabaceae(), config, myImport, doVocabularies);
130 }
131 if (doGymnospermae){
132 doSingleSource(gymnospermae(), config, myImport, doVocabularies);
133 }
134 if (doLamVerbenaceae){
135 doSingleSource(lamVerbenaceae(), config, myImport, doVocabularies);
136 }
137 if (doMalpighiaceae){
138 doSingleSource(malpighiaceae(), config, myImport, doVocabularies);
139 }
140 if (doMelastomataceae){
141 doSingleSource(melastomataceae(), config, myImport, doVocabularies);
142 }
143 if (doMonocots){
144 doSingleSource(monocots(), config, myImport, doVocabularies);
145 }
146 if (doMyrtaceae){
147 doSingleSource(myrtaceae(), config, myImport, doVocabularies);
148 }
149 if (doOrchidaceae){
150 doSingleSource(orchidaceae(), config, myImport, doVocabularies);
151 }
152 if (doRubiaceae){
153 doSingleSource(rubiaceae(), config, myImport, doVocabularies);
154 }
155 if (doUrticaceae){
156 doSingleSource(urticaceae(), config, myImport, doVocabularies);
157 }
158
159
160 //deduplicate
161 if (doDeduplicate){
162 logger.warn("Start deduplication ...");
163
164 ICdmApplicationConfiguration app = myImport.getCdmAppController();
165 if (app == null){
166 app = CdmApplicationController.NewInstance(cdmDestination, hbm2dll, false);
167 }
168 int count = app.getAgentService().deduplicate(Person.class, null, null);
169 logger.warn("Deduplicated " + count + " persons.");
170 // count = app.getAgentService().deduplicate(Team.class, null, null);
171 // logger.warn("Deduplicated " + count + " teams.");
172 // count = app.getReferenceService().deduplicate(Reference.class, null, null);
173 // logger.warn("Deduplicated " + count + " references.");
174 }
175
176 System.exit(0);
177
178 }
179
180 /**
181 * @param source
182 * @param config
183 * @param myImport
184 */
185 private void doSingleSource(URI source, CubaImportConfigurator config,
186 CdmDefaultImport<CubaImportConfigurator> myImport, boolean doVocabularies) {
187 config.setSource(source);
188 String fileName = source.toString();
189 fileName = fileName.substring(fileName.lastIndexOf("/") + 1 );
190
191 String message = "Start import from ("+ fileName + ") ...";
192 System.out.println(message);
193 logger.warn(message);
194 config.setSourceReference(getSourceReference(fileName));
195 config.setDoVocabularies(doVocabularies);
196 myImport.invoke(config);
197
198 if (doVocabularies){
199 FeatureTree tree = makeFeatureNodes(myImport.getCdmAppController().getTermService());
200 myImport.getCdmAppController().getFeatureTreeService().saveOrUpdate(tree);
201 this.doVocabularies = false;
202 }
203 System.out.println("End import from ("+ source.toString() + ")...");
204 }
205
206 private final Reference inRef = ReferenceFactory.newGeneric();
207 private Reference getSourceReference(String string) {
208 Reference result = ReferenceFactory.newGeneric();
209 result.setTitleCache(string, true);
210 result.setInReference(inRef);
211 inRef.setTitleCache(sourceReferenceTitle, true);
212 return result;
213 }
214
215 private FeatureTree makeFeatureNodes(ITermService service){
216 // CyprusTransformer transformer = new CyprusTransformer();
217
218 FeatureTree result = FeatureTree.NewInstance(featureTreeUuid);
219 result.setTitleCache("Cuba Feature Tree", true);
220 FeatureNode root = result.getRoot();
221 FeatureNode newNode;
222
223 newNode = FeatureNode.NewInstance(Feature.DISTRIBUTION());
224 root.addChild(newNode);
225
226 // Feature featurAltFam = (Feature)service.find(CubaTransformer.uuidAlternativeFamily);
227 // newNode = FeatureNode.NewInstance(featurAltFam);
228 // root.addChild(newNode);
229
230 Feature featurAltFam2 = (Feature)service.find(CubaTransformer.uuidAlternativeFamily2);
231 newNode = FeatureNode.NewInstance(featurAltFam2);
232 root.addChild(newNode);
233
234 return result;
235 }
236
237
238 //Monocots
239 public static URI monocots() {
240 return URI.create("file:////BGBM-PESIHPC/Cuba/Monocot.xlsx");
241 }
242 //Cyperaceae
243 public static URI cyperaceae() {
244 return URI.create("file:////BGBM-PESIHPC/Cuba/Cyper_Poaceae.xlsx");
245 }
246 //Fabaceae
247 public static URI fabaceae() {
248 return URI.create("file:////BGBM-PESIHPC/Cuba/Fabaceae.xlsx");
249 }
250 //Urticaceae
251 public static URI urticaceae() {
252 return URI.create("file:////BGBM-PESIHPC/Cuba/Urticaceae.xlsx");
253 }
254 //Asteraceae
255 public static URI asteraceae() {
256 return URI.create("file:////BGBM-PESIHPC/Cuba/Asteraceae.xlsx");
257 }
258 //Convolvulaceae
259 public static URI convolvulaceae() {
260 return URI.create("file:////BGBM-PESIHPC/Cuba/Convolvulaceae.xlsx");
261 }
262 //dicot A-C
263 public static URI dicotA_C() {
264 return URI.create("file:////BGBM-PESIHPC/Cuba/dicotA_C.xlsx");
265 }
266 //dicot D-M
267 public static URI dicotD_M() {
268 return URI.create("file:////BGBM-PESIHPC/Cuba/dicotD_M.xlsx");
269 }
270 //dicot N-Z
271 public static URI dicotN_Z() {
272 return URI.create("file:////BGBM-PESIHPC/Cuba/dicotN_Z.xlsx");
273 }
274 //Euphorbiaceae
275 public static URI euphorbiaceae() {
276 return URI.create("file:////BGBM-PESIHPC/Cuba/Euphorbiaceae.xlsx");
277 }
278 //Gymnospermae
279 public static URI gymnospermae() {
280 return URI.create("file:////BGBM-PESIHPC/Cuba/gymnospermae.xlsx");
281 }
282 //Lam.Verbenaceae
283 public static URI lamVerbenaceae() {
284 return URI.create("file:////BGBM-PESIHPC/Cuba/Lam_Verbenaceae.xlsx");
285 }
286 //Malpighiaceae
287 public static URI malpighiaceae() {
288 return URI.create("file:////BGBM-PESIHPC/Cuba/Malpighiaceae.xlsx");
289 }
290 //Melastomataceae
291 public static URI melastomataceae() {
292 return URI.create("file:////BGBM-PESIHPC/Cuba/Melastomataceae.xlsx");
293 }
294 //Myrtaceae
295 public static URI myrtaceae() {
296 return URI.create("file:////BGBM-PESIHPC/Cuba/Myrtaceae.xlsx");
297 }
298 //Orchidaceae
299 public static URI orchidaceae() {
300 return URI.create("file:////BGBM-PESIHPC/Cuba/Orchidaceae.xlsx");
301 }
302 //Rubiaceae
303 public static URI rubiaceae() {
304 return URI.create("file:////BGBM-PESIHPC/Cuba/Rubiaceae.xlsx");
305 }
306
307 /**
308 * @param args
309 */
310 public static void main(String[] args) {
311 CubaActivator me = new CubaActivator();
312 me.doImport(cdmDestination);
313 }
314
315 }