Revision 0f8489d7
Added by Andreas Müller almost 10 years ago
app-import/src/main/java/eu/etaxonomy/cdm/app/proibiosphere/TaxonXImportLauncher.java | ||
---|---|---|
51 | 51 |
// static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_local_mysql(); |
52 | 52 |
static final ICdmDataSource cdmDestination = CdmDestinations.localH2(); |
53 | 53 |
// static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_local_mysql_test(); |
54 |
// static final ICdmDataSource cdmDestination = CdmDestinations.cdm_production_piB("piB_spiders"); |
|
54 | 55 |
|
55 | 56 |
static final CHECK check = CHECK.IMPORT_WITHOUT_CHECK; |
56 | 57 |
|
... | ... | |
79 | 80 |
|
80 | 81 |
String defaultClassification="Spiders"; |
81 | 82 |
boolean alwaysUseDefaultClassification = true; |
83 |
|
|
84 |
boolean useOldUnparsedSynonymExtraction = false; |
|
82 | 85 |
|
83 | 86 |
|
84 | 87 |
|
... | ... | |
95 | 98 |
loadTreatmentIfPresent(filterType,taxonList, documentMap); |
96 | 99 |
// loadTreatmentIfPresent(FilterType.MODS,modsList, documents,documentMap); |
97 | 100 |
|
98 |
TaxonXImportConfigurator taxonxImportConfigurator =null; |
|
99 | 101 |
CdmDefaultImport<TaxonXImportConfigurator> taxonImport = new CdmDefaultImport<TaxonXImportConfigurator>(); |
100 | 102 |
|
101 | 103 |
ICdmDataSource destination = cdmDestination; |
102 |
taxonxImportConfigurator = prepareTaxonXImport(destination,reuseSecundum, secundum, tnomenclature, alwaysUseDefaultClassification); |
|
103 |
|
|
104 |
taxonxImportConfigurator.setImportClassificationName(defaultClassification); |
|
104 |
TaxonXImportConfigurator config = prepareTaxonXImport(destination,reuseSecundum, secundum, tnomenclature, alwaysUseDefaultClassification); |
|
105 |
config.setUseOldUnparsedSynonymExtraction(useOldUnparsedSynonymExtraction); |
|
106 |
|
|
107 |
config.setImportClassificationName(defaultClassification); |
|
105 | 108 |
log.info("Start import from TaxonX Data"); |
106 | 109 |
|
107 |
taxonxImportConfigurator.setLastImport(false);
|
|
110 |
config.setLastImport(false);
|
|
108 | 111 |
|
109 | 112 |
int j=0; |
110 | 113 |
for (String document : documentMap.keySet()){ |
... | ... | |
115 | 118 |
System.out.println("START "+document+" "+i+" ("+(documentMap.get(document)).size()+"): "+source.getPath()); |
116 | 119 |
i++; |
117 | 120 |
if (j==documentMap.keySet().size() && i==documentMap.get(document).size()) { |
118 |
taxonxImportConfigurator.setLastImport(true);
|
|
121 |
config.setLastImport(true);
|
|
119 | 122 |
} |
120 |
prepareReferenceAndSource(taxonxImportConfigurator,source);
|
|
123 |
prepareReferenceAndSource(config,source);
|
|
121 | 124 |
// taxonxImportConfigurator.setTaxonReference(null); |
122 |
taxonImport.invoke(taxonxImportConfigurator);
|
|
125 |
taxonImport.invoke(config);
|
|
123 | 126 |
log.info("End import from SpecimenData ("+ source.toString() + ")..."); |
124 | 127 |
|
125 | 128 |
// //deduplicate |
... | ... | |
221 | 224 |
e1.printStackTrace(); |
222 | 225 |
} |
223 | 226 |
|
224 |
// sourcesStr.add("/home/pkelbert/Documents/Proibiosphere/ChenopodiumXML/1362148061170_Chenopodium_K_hn_U_1993_tx.xml"); |
|
225 |
|
|
226 | 227 |
//System.out.println(documents); |
227 | 228 |
for (String docId : docs.keySet()){ |
228 | 229 |
List<String> treatments = new ArrayList<String>(new HashSet<String>(docs.get(docId))); |
... | ... | |
243 | 244 |
// log.info(pages); |
244 | 245 |
|
245 | 246 |
log.info("Document "+docId+" should have "+treatments.size()+" treatments"); |
246 |
//don't test if all the treatments are really online, it should be working without problems now |
|
247 |
// int cnt=0; |
|
248 |
// if(treatments.size()<150){ |
|
249 |
// |
|
250 |
// for (String source:treatments){ |
|
251 |
// DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); |
|
252 |
// DocumentBuilder builder; |
|
253 |
// URL url; |
|
254 |
// |
|
255 |
// try { |
|
256 |
// builder = factory.newDocumentBuilder(); |
|
257 |
// url = new URL(source.split("---")[3]); |
|
258 |
// Object o = url.getContent(); |
|
259 |
// InputStream is = (InputStream) o; |
|
260 |
// Document document = builder.parse(is); |
|
261 |
// cnt++; |
|
262 |
// }catch(Exception e){ |
|
263 |
// // e.printStackTrace(); |
|
264 |
// log.warn(e); |
|
265 |
// } |
|
266 |
// } |
|
267 |
// log.info("Document "+docId+" has "+cnt+" treatments available"); |
|
268 |
// } |
|
269 |
// if(treatments.size() != cnt) |
|
270 |
// { |
|
271 |
// File file = new File("/home/pkelbert/Bureau/urlTaxonXToDoLater.txt"); |
|
272 |
// FileWriter writer; |
|
273 |
// try { |
|
274 |
// writer = new FileWriter(file ,true); |
|
275 |
// writer.write(docId+"\n"); |
|
276 |
// writer.flush(); |
|
277 |
// writer.close(); |
|
278 |
// } catch (IOException e1) { |
|
279 |
// // TODO Auto-generated catch block |
|
280 |
// e1.printStackTrace(); |
|
281 |
// } |
|
282 |
// |
|
283 |
// } |
|
284 |
// else{ |
|
285 | 247 |
List<URI> uritmp = documentMap.get(docId); |
286 | 248 |
if (uritmp == null) { |
287 | 249 |
uritmp = new ArrayList<URI>(); |
... | ... | |
305 | 267 |
|
306 | 268 |
|
307 | 269 |
|
308 |
// } |
|
309 |
////// log.info("NB SOURCES : "+sourcesStr.size()); |
|
310 |
// List<URI> sourcesStr = new ArrayList<URI>(); |
|
311 |
// try { |
|
312 |
//// documentMap = new HashMap<String, List<URI>>(); |
|
313 |
// sourcesStr.add(new URI("http://plazi.cs.umb.edu/GgServer/cdmSync/8F5B3EA099D371BC41CC5DDBFEDCFBED")); |
|
314 |
// documentMap.put("singlesource", sourcesStr); |
|
315 |
// } catch (URISyntaxException e) { |
|
316 |
// // TODO Auto-generated catch block |
|
317 |
// e.printStackTrace(); |
|
318 |
// } |
|
270 |
|
|
319 | 271 |
|
320 | 272 |
return documentMap; |
321 | 273 |
|
Also available in: Unified diff
removed some destinations