Now iterates through all dwca.zip files downloaded using Scratchpads service and...
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / app / vibrant / DwcaScratchpadImportActivator.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.app.vibrant;
11
12 import java.io.File;
13 import java.net.URI;
14 import java.util.UUID;
15
16 import org.apache.log4j.Logger;
17
18 import eu.etaxonomy.cdm.app.common.CdmDestinations;
19 import eu.etaxonomy.cdm.database.DbSchemaValidation;
20 import eu.etaxonomy.cdm.database.ICdmDataSource;
21 import eu.etaxonomy.cdm.io.common.CdmDefaultImport;
22 import eu.etaxonomy.cdm.io.common.IImportConfigurator.CHECK;
23 import eu.etaxonomy.cdm.io.common.events.LoggingIoObserver;
24 import eu.etaxonomy.cdm.io.dwca.in.DwcaDataImportConfiguratorBase.DatasetUse;
25 import eu.etaxonomy.cdm.io.dwca.in.DwcaImportConfigurator;
26 import eu.etaxonomy.cdm.io.dwca.in.IImportMapping.MappingType;
27 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
28 import eu.etaxonomy.cdm.model.reference.Reference;
29 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
30
31 /**
32 * @author a.mueller
33 * @created 03.04.2012
34 * @version 1.0
35 */
36 public class DwcaScratchpadImportActivator {
37 @SuppressWarnings("unused")
38 private static final Logger logger = Logger.getLogger(DwcaScratchpadImportActivator.class);
39
40 //database validation status (create, update, validate ...)
41 static DbSchemaValidation hbm2dll = DbSchemaValidation.UPDATE;//UPDATE;//CREATE;//UPDATE;
42
43 // static final URI source = dwca_emonocots_dioscoreaceae();
44 // static final URI source = dwca_emonocots_zingiberaceae();
45 //static final URI source = dwca_emonocots_cypripedioideae();
46 static final URI source = null;//dwca_antkey();//dwca_emonocots_dioscoreaceae();
47
48
49 // static final ICdmDataSource cdmDestination = CdmDestinations.localH2();
50 static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_local_mysql();
51 // static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_local_mysql_test();
52
53 //default nom code is ICZN as it allows adding publication year
54 static final NomenclaturalCode defaultNomCode = NomenclaturalCode.ICBN;
55
56 //classification Name
57 static String classificationName = "Default classification";
58
59 //title
60 static final String title = "Scratchpad test import";
61
62 //check - import
63 static final CHECK check = CHECK.IMPORT_WITHOUT_CHECK;
64 static int partitionSize = 1000;
65
66 //config
67 static DatasetUse datasetUse = DatasetUse.CLASSIFICATION;
68
69 //validate
70 static boolean validateRankConsistency = false;
71
72
73 //taxa
74 static final boolean doTaxa = true;
75 static final boolean doDistribution = true;
76
77
78
79 static final MappingType mappingType = MappingType.InMemoryMapping;
80
81 //classification
82 static final UUID classificationUuid = UUID.fromString("d9d199b6-eaf4-47c8-a732-0639bc445c56");
83
84
85 //config
86 static boolean scientificNameIdAsOriginalSourceId = true;
87 static boolean guessNomRef = false;
88 private boolean handleAllRefsAsCitation = false;
89 private static final boolean useSourceReferenceAsSec = true;
90
91
92 //deduplicate
93 static final boolean doDeduplicate = false;
94
95
96
97 protected void doImport(URI source, ICdmDataSource cdmDestination, UUID classificationUuid, String title, DbSchemaValidation hbm2dll){
98
99 //make Source
100 DwcaImportConfigurator config= DwcaImportConfigurator.NewInstance(source, cdmDestination);
101 config.addObserver(new LoggingIoObserver());
102 config.setClassificationUuid(classificationUuid);
103 config.setCheck(check);
104 config.setDbSchemaValidation(hbm2dll);
105 config.setMappingType(mappingType);
106
107 config.setScientificNameIdAsOriginalSourceId(scientificNameIdAsOriginalSourceId);
108 config.setValidateRankConsistency(validateRankConsistency);
109 config.setDefaultPartitionSize(partitionSize);
110 config.setNomenclaturalCode(defaultNomCode);
111 config.setDatasetUse(datasetUse);
112 config.setGuessNomenclaturalReferences(guessNomRef);
113 config.setHandleAllRefsAsCitation(handleAllRefsAsCitation);
114 config.setUseSourceReferenceAsSec(useSourceReferenceAsSec);
115 config.setSourceReferenceTitle(classificationName);//title);
116 config.setClassificationName(classificationName);
117
118 config.setUseSourceReferenceAsSec(true);//Lorna: what shall we use as sec reference for Scratchpads data?
119
120 CdmDefaultImport myImport = new CdmDefaultImport();
121
122
123 //...
124 if (true){
125 System.out.println("Start import from ("+ source.toString() + ") ...");
126 config.setSourceReference(getSourceReference(config.getSourceReferenceTitle()));
127 myImport.invoke(config);
128 System.out.println("End import from ("+ source.toString() + ")...");
129 }
130
131
132 }
133
134 private Reference<?> getSourceReference(String string) {
135 Reference<?> result = ReferenceFactory.newGeneric();
136 result.setTitleCache(string);
137 return result;
138 }
139
140 //Dwca
141 public static URI dwca_emonocots_local() {
142 URI sourceUrl = URI.create("file:///C:/localCopy/Data/dwca/import/Scratchpads/dwca_dioscoreaceae_emonocots.zip");
143 return sourceUrl;
144 }
145
146 //emonocots_dioscoreaceae
147 public static URI dwca_emonocots_dioscoreaceae() {
148 //URI sourceUrl = URI.create("file:////PESIIMPORT3/vibrant/dwca/dwca_emonocots_dioscoreaceae.zip");//dwca_dioscoreaceae_e_monocot.zip
149 URI sourceUrl = URI.create("file:///C:/Users/l.morris/Downloads/dwca_scratchpads/dwca_dioscoreaceae_e_monocot_org.zip");
150 return sourceUrl;
151 }
152
153 //dwca_antkey_org.zip
154 public static URI dwca_antkey() {
155 //URI sourceUrl = URI.create("file:////PESIIMPORT3/vibrant/dwca/dwca_emonocots_dioscoreaceae.zip");//dwca_dioscoreaceae_e_monocot.zip
156 URI sourceUrl = URI.create("file:///C:/Users/l.morris/Downloads/amaryllidaceae.zip");//antkey.zip");//alismataceae.zip");//hypoxidaceae.zip");//dwca_antkey.zip");
157 return sourceUrl;
158 }
159
160 //emonocots_zingiberaceae
161 public static URI dwca_emonocots_zingiberaceae() {
162 URI sourceUrl = URI.create("file:////PESIIMPORT3/vibrant/dwca/dwca_emonocots_zingiberaceae.zip");
163 return sourceUrl;
164 }
165 //emonocots_cypripedioideae
166 public static URI dwca_emonocots_cypripedioideae() {
167 //URI sourceUrl = URI.create("file:////PESIIMPORT3/vibrant/dwca/dwca_emonocots_cypripedioideae.zip");
168 URI sourceUrl = URI.create("file:///C:/Users/l.morris/Downloads/dwca_scratchpads/dwca_cypripedioideae_e-monocot_org.zip");
169 return sourceUrl;
170 }
171
172
173 /**
174 * @param args
175 */
176 public static void main(String[] args) {
177 DwcaScratchpadImportActivator me = new DwcaScratchpadImportActivator();
178
179 //lorna: TODO get the classification name from the dwca zip file name
180 classificationName = "Scratchpad classification";//"Amaryllidaceae";
181
182 // Directory path here
183 String path = "C:/Users/l.morris/Downloads/dwca_scratchpads/nine";
184 URI sourceUrl;
185 //URI sourceUrl = URI.create("file:////PESIIMPORT3/vibrant/dwca/dwca_emonocots_zingiberaceae.zip");
186
187 String zipFile;
188 File folder = new File(path);
189 File[] listOfFiles = folder.listFiles();
190
191 for (int i = 0; i < listOfFiles.length; i++)
192 {
193
194 if (listOfFiles[i].isFile())
195 {
196 zipFile = listOfFiles[i].getName();
197
198 if (zipFile.endsWith(".zip"))
199 {
200 //classificationName = zipFile.split(".zip")[0];
201 classificationName = zipFile.split("dwca_")[1];
202 classificationName = classificationName.split("_")[0];
203 System.out.println(classificationName);
204 //start the Scratchpad name with uppercase.
205 char[] stringArray = classificationName.toCharArray();
206 stringArray[0] = Character.toUpperCase(stringArray[0]);
207 classificationName = new String(stringArray) + " (Scratchpads)";
208 System.out.println(classificationName);
209 //System.exit(999);
210
211 sourceUrl = URI.create("file:///" + path + "/" + zipFile);
212 System.out.println(sourceUrl);
213 me.doImport(sourceUrl, cdmDestination, classificationUuid, title, hbm2dll);
214 //System.exit(999);
215 }
216 }
217 }
218
219 //System.exit(999);
220 //list all files in the directory
221 //get the URI of each
222 //URI sourceUrl = URI.create("file:///C:/Users/l.morris/Downloads/amaryllidaceae.zip");
223
224 //Lorna iterate through the dwca directly getting each dwca.zip and generate a URI for each source
225 //me.doImport(source, cdmDestination, classificationUuid, title, hbm2dll);
226 }
227
228 }