//database validation status (create, update, validate ...)
static DbSchemaValidation hbm2dll = DbSchemaValidation.CREATE;
+// static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_local_mysql();
static final ICdmDataSource cdmDestination = CdmDestinations.localH2();
-
+// static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_local_mysql_test();
+
static final CHECK check = CHECK.IMPORT_WITHOUT_CHECK;
+
+ private enum FilterType{MODS, TAXON};
static String plaziUrl = "http://plazi.cs.umb.edu/GgServer/search?taxonomicName.isNomenclature=true&taxonomicName.exactMatch=true&indexName=0&subIndexName=taxonomicName&subIndexName=MODS&minSubResultSize=1&searchMode=index&resultFormat=xml&xsltUrl=http%3A%2F%2Fplazi.cs.umb.edu%2FGgServer%2Fresources%2FsrsWebPortalData%2FCdmSyncTreatmentList.xslt&taxonomicName.taxonomicName=";
static String plaziUrlDoc = "http://plazi.cs.umb.edu/GgServer/search?taxonomicName.isNomenclature=true&taxonomicName.exactMatch=true&indexName=0&subIndexName=taxonomicName&subIndexName=MODS&minSubResultSize=1&searchMode=index&resultFormat=xml&xsltUrl=http%3A%2F%2Fplazi.cs.umb.edu%2FGgServer%2Fresources%2FsrsWebPortalData%2FCdmSyncTreatmentList.xslt&MODS.ModsDocID=";
- private static String askQuestion(String question){
- Scanner scan = new Scanner(System.in);
- System.out.println(question);
- String index = scan.nextLine();
- return index;
- }
public static void main(String[] args) {
-// String[] taxonList = new String[] {"Eupolybothrus","Polybothrus"};
- /*ants*/ String[] modsList = new String[] {"3924", "3743", "4375", "6757", "6752", "3481", "21401_fisher_smith_plos_2008", "2592", "4096", "6877", "6192", "8071"};
+ String[] spiderModsList = new String[] {"zt03768p138","zt03750p196","zt03666p193","zt03664p068","zt03646p592","zt03507p056","zt03415p057","zt03383p038","zt03305p052","zt03228p068","zt03131p034","zt02963p068","zt02883p068","zt02814p018","zt02739p050","zt02730p043","zt02637p054","zt02593p127","zt02551p068","zt02534p036","zt02526p053","zt02427p035","zt02361p012","zt02267p068","zt02223p047","zt01826p058","zt01775p024","zt01744p040","zt01529p060","zt01004p028","zt00904","zt00872","zt00619","zt00109","DippenaarSchoeman1989Penestominae","Simon1902Cribellates","Simon1903Penestominae","Lehtinen1967CribellatePenestominae"};
+
+ String[] taxonList = new String[] {"Comaroma"}; //{"Eupolybothrus","Polybothrus"}, Chenopodium, Lactarius, Campylopus, Nephrolepis, Comaroma (spiders)
+// /*ants Anochetus*/ String[] modsList = new String[] {"3924" /*, "3743", "4375", "6757", "6752", "3481", "21401_fisher_smith_plos_2008", "2592", "4096", "6877", "6192", "8071" */};
// String[] modsList = new String[] {"21367", "21365", "8171", "6877", "21820", "3641", "6757"};
-// debut="3743", "3628", "4022", "3994", "3603", "8070", "4001", "4071", "3948", "3481"};
+// /*auch ants*/ debut="3743", "3628", "4022", "3994", "3603", "8070", "4001", "4071", "3948", "3481"};
// suite: , };//,"3540555099"};
// modsList = new String[] {"Zapparoli-1986-Eupolybothrus-fasciatus"};
- String tnomenclature = "ICZN";
+ taxonList = spiderModsList;
+
+ FilterType filterType = FilterType.MODS;
+
+ NomenclaturalCode tnomenclature = NomenclaturalCode.ICZN;
- String defaultClassif="Ants";
+ String defaultClassification="Spiders";
+ boolean alwaysUseDefaultClassification = true;
- Map<String,List<String>> documents = new HashMap<String,List<String>>();
- HashMap<String,List<URI>>documentMap = new HashMap<String, List<URI>>();
+
+
+
+ Map<String,List<URI>>documentMap = new HashMap<String, List<URI>>();
/*HOW TO HANDLE SECUNDUM REFERENCE*/
boolean reuseSecundum = askIfReuseSecundum();
secundum = askForSecundum();
}
-// checkTreatmentPresence("taxon",taxonList, documents,documentMap);
- checkTreatmentPresence("modsid",modsList, documents,documentMap);
+ loadTreatmentIfPresent(filterType,taxonList, documentMap);
+// loadTreatmentIfPresent(FilterType.MODS,modsList, documents,documentMap);
TaxonXImportConfigurator taxonxImportConfigurator =null;
CdmDefaultImport<TaxonXImportConfigurator> taxonImport = new CdmDefaultImport<TaxonXImportConfigurator>();
ICdmDataSource destination = cdmDestination;
- taxonxImportConfigurator = prepareTaxonXImport(destination,reuseSecundum, secundum);
+ taxonxImportConfigurator = prepareTaxonXImport(destination,reuseSecundum, secundum, tnomenclature, alwaysUseDefaultClassification);
- taxonxImportConfigurator.setImportClassificationName(defaultClassif);
+ taxonxImportConfigurator.setImportClassificationName(defaultClassification);
log.info("Start import from TaxonX Data");
taxonxImportConfigurator.setLastImport(false);
int j=0;
- for (String document:documentMap.keySet()){
+ for (String document : documentMap.keySet()){
j++;
if (doImportDocument(document, documentMap.get(document).size())){
int i=0;
- for (URI source:documentMap.get(document)){
+ for (URI source: documentMap.get(document)){
System.out.println("START "+document+" "+i+" ("+(documentMap.get(document)).size()+"): "+source.getPath());
i++;
if (j==documentMap.keySet().size() && i==documentMap.get(document).size()) {
taxonxImportConfigurator.setLastImport(true);
}
- prepareReferenceAndSource(taxonxImportConfigurator,source);
- prepareNomenclature(taxonxImportConfigurator,tnomenclature);
- // taxonxImportConfigurator.setTaxonReference(null);
+ prepareReferenceAndSource(taxonxImportConfigurator,source);
+ // taxonxImportConfigurator.setTaxonReference(null);
taxonImport.invoke(taxonxImportConfigurator);
log.info("End import from SpecimenData ("+ source.toString() + ")...");
}
-
- /**
- * @param taxonxImportConfigurator
- * @param tnomenclature
- */
- private static void prepareNomenclature(TaxonXImportConfigurator taxonxImportConfigurator, String tnomenclature) {
- // String tnomenclature = askQuestion("ICBN or ICZN ?");
- taxonxImportConfigurator.setNomenclaturalCode(NomenclaturalCode.ICNAFP);
- if (tnomenclature.equalsIgnoreCase("ICBN")) {
- taxonxImportConfigurator.setNomenclaturalCode(NomenclaturalCode.ICNAFP);
- // taxonxImportConfigurator.setClassificationName("Chenopodiaceae");
- }
- if(tnomenclature.equalsIgnoreCase("ICZN")){
- taxonxImportConfigurator.setNomenclaturalCode(NomenclaturalCode.ICZN);
- // taxonxImportConfigurator.setClassificationName("Ants");
- }
- if(tnomenclature.equalsIgnoreCase("ICNB")){
- taxonxImportConfigurator.setNomenclaturalCode(NomenclaturalCode.ICNB);
- // taxonxImportConfigurator.setClassificationName("Bacteria");
- }
-
+ private static String askQuestion(String question){
+ Scanner scan = new Scanner(System.in);
+ System.out.println(question);
+ String index = scan.nextLine();
+ return index;
}
/**
String tref="PLAZI - "+source.getPath().split("/")[source.getPath().split("/").length-1];
reference.setTitleCache(tref,true);
reference.setTitle(tref);
- reference.generateTitle();
-
+
taxonxImportConfigurator.setSourceReference(reference);
TaxonXImportConfigurator.setSourceRef(reference);
* @param destination
* @param reuseSecundum
* @param secundum
+ * @param tnomenclature
+ * @param alwaysUseDefaultClassification
* @return
*/
- private static TaxonXImportConfigurator prepareTaxonXImport(ICdmDataSource destination, boolean reuseSecundum, Reference<?> secundum) {
+ private static TaxonXImportConfigurator prepareTaxonXImport(ICdmDataSource destination, boolean reuseSecundum, Reference<?> secundum, NomenclaturalCode tnomenclature, boolean alwaysUseDefaultClassification) {
TaxonXImportConfigurator taxonxImportConfigurator = TaxonXImportConfigurator.NewInstance(destination);
- // taxonxImportConfigurator.setClassificationName(taxonxImportConfigurator.getSourceReferenceTitle());
+ //taxonxImportConfigurator.setClassificationName(taxonxImportConfigurator.getSourceReferenceTitle());
taxonxImportConfigurator.setCheck(check);
taxonxImportConfigurator.setDbSchemaValidation(hbm2dll);
taxonxImportConfigurator.setDoAutomaticParsing(true);
taxonxImportConfigurator.setInteractWithUser(true);
+ taxonxImportConfigurator.setNomenclaturalCode(tnomenclature);
+ taxonxImportConfigurator.setAlwaysUseDefaultClassification(alwaysUseDefaultClassification);
taxonxImportConfigurator.setKeepOriginalSecundum(reuseSecundum);
if (!reuseSecundum) {
taxonxImportConfigurator.setSecundum(secundum);
}
- // taxonxImportConfigurator.setDoMatchTaxa(true);
- // taxonxImportConfigurator.setReUseTaxon(true);
+ //taxonxImportConfigurator.setDoMatchTaxa(true);
+ // taxonxImportConfigurator.setReUseTaxon(true);
return taxonxImportConfigurator;
}
/**
- * @param importFilter
+ * @param filterType
* @param modsList
* @param documents
* @param documentMap
* @return
*/
- private static HashMap<String, List<URI>> checkTreatmentPresence(String importFilter, String[] modsList, Map<String, List<String>> documents, HashMap<String, List<URI>> documentMap) {
- URL plaziURL;
- // System.out.println(plaziUrl);
+ private static Map<String, List<URI>> loadTreatmentIfPresent(FilterType filterType, String[] filterList, Map<String, List<URI>> documentMap) {
- Map<String, List<String>> docs = new HashMap<String, List<String>>();
+ Map<String, List<String>> docs = new HashMap<String, List<String>>();
try {
- BufferedReader in=null;
List<String> docList;
String inputLine;
- String docID;
- String pageStart;
- String pageEnd;
- String taxon;
- String link;
String urlstr="";
- for(String modsID : modsList){
- // plaziUrl=plaziUrl+"Eupolybothrus";
- if (importFilter.equalsIgnoreCase("modsid")) {
- urlstr=plaziUrlDoc+modsID;
- }
- if (importFilter.equalsIgnoreCase("taxon")) {
- urlstr=plaziUrl+modsID;
- }
-// System.out.println(url);
-
- plaziURL = new URL(urlstr);
- in = new BufferedReader(new InputStreamReader(plaziURL.openStream()));
-
-
- //TODO lastUpdate field
- // if(!plaziNotServer){
- while ((inputLine = in.readLine()) != null) {
- System.out.println(inputLine);
- if (inputLine.startsWith("<treatment ")){
- taxon = inputLine.split("taxon=\"")[1].split("\"")[0];
- docID=inputLine.split("docId=\"")[1].split("\"")[0];
- System.out.println("docID: "+docID);
- link=inputLine.split("link=\"")[1].split("\"")[0];
- pageStart = inputLine.split("startPage=\"")[1].split("\"")[0];
- pageEnd = inputLine.split("endPage=\"")[1].split("\"")[0];
- docList = documents.get(docID);
- if (docList == null) {
- docList = new ArrayList<String>();
- }
- docList.add(pageStart+"---"+pageEnd+"---"+taxon+"---"+link);
- documents.put(docID,docList);
- }
- }
- }
- System.out.println("hop");
-
-
-
- for (String docId:documents.keySet()){
- in = new BufferedReader(new InputStreamReader(new URL(plaziUrlDoc+docId).openStream()));
- while ((inputLine = in.readLine()) != null) {
- if (inputLine.startsWith("<treatment ")){
- taxon = inputLine.split("taxon=\"")[1].split("\"")[0];
- docID=inputLine.split("docId=\"")[1].split("\"")[0];
- link=inputLine.split("link=\"")[1].split("\"")[0];
- pageStart = inputLine.split("startPage=\"")[1].split("\"")[0];
- pageEnd = inputLine.split("endPage=\"")[1].split("\"")[0];
- docList = documents.get(docID);
- if (docList == null) {
- docList = new ArrayList<String>();
- }
- docList.add(pageStart+"---"+pageEnd+"---"+taxon+"---"+link);
- docs.put(docID,docList);
- }
- }
- }
- // if(plaziNotServer) {
- // sourcesStr.add(plaziUrl);
- // }
- // in.close();
- } catch (MalformedURLException e1) {
- // TODO Auto-generated catch block
+ Map<String,List<String>> documents = fillDocumentMap(filterType, filterList, urlstr);
+
+// checkTreatmentAvailable(documents, docs);
+ docs = documents;
+
+ } catch (Exception e1) {
e1.printStackTrace();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
}
- // System.exit(0);
-
// sourcesStr.add("/home/pkelbert/Documents/Proibiosphere/ChenopodiumXML/1362148061170_Chenopodium_K_hn_U_1993_tx.xml");
//System.out.println(documents);
}
+ private static void checkTreatmentAvailable(Map<String, List<String>> documents, Map<String, List<String>> docs)
+ throws IOException, MalformedURLException {
+ List<String> docList;
+ String inputLine;
+ for (String docId:documents.keySet()){
+ URL url = new URL(plaziUrlDoc+docId);
+ BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream()));
+ while ((inputLine = in.readLine()) != null) {
+ if (inputLine.startsWith("<treatment ")){
+ String taxon = inputLine.split("taxon=\"")[1].split("\"")[0];
+ String docID=inputLine.split("docId=\"")[1].split("\"")[0];
+ String link=inputLine.split("link=\"")[1].split("\"")[0];
+ String pageStart = inputLine.split("startPage=\"")[1].split("\"")[0];
+ String pageEnd = inputLine.split("endPage=\"")[1].split("\"")[0];
+ docList = documents.get(docID);
+ if (docList == null) {
+ docList = new ArrayList<String>();
+ }
+ docList.add(pageStart+"---" + pageEnd + "---" + taxon + "---"+link);
+ docs.put(docID,docList);
+ }
+ }
+ }
+ }
+
+ private static Map<String, List<String>> fillDocumentMap(FilterType filterType,
+ String[] filterList, String urlstr)
+ throws MalformedURLException, IOException {
+
+ Map<String, List<String>> documents = new HashMap<String, List<String>>();
+ List<String> docList;
+ String inputLine;
+ for(String filter : filterList){
+ // plaziUrl=plaziUrl+"Eupolybothrus";
+ if (filterType == FilterType.MODS) {
+ urlstr=plaziUrlDoc + filter;
+ }else if (filterType == FilterType.TAXON) {
+ urlstr=plaziUrl + filter;
+ }
+ log.info("URLstr: " + urlstr);
+
+ URL plaziURL = new URL(urlstr);
+ BufferedReader in = new BufferedReader(new InputStreamReader(plaziURL.openStream()));
+
+
+ //TODO lastUpdate field
+ // if(!plaziNotServer){
+ while ((inputLine = in.readLine()) != null) {
+ System.out.println(inputLine);
+ if (inputLine.startsWith("<treatment ")){
+ String taxon = inputLine.split("taxon=\"")[1].split("\"")[0];
+ String docID=inputLine.split("docId=\"")[1].split("\"")[0];
+ System.out.println("docID: "+docID);
+
+ String link=inputLine.split("link=\"")[1].split("\"")[0];
+ String pageStart = inputLine.split("startPage=\"")[1].split("\"")[0];
+ String pageEnd = inputLine.split("endPage=\"")[1].split("\"")[0];
+ docList = documents.get(docID);
+ if (docList == null) {
+ docList = new ArrayList<String>();
+ }
+ docList.add(pageStart+"---" + pageEnd + "---"+taxon+"---"+link);
+ documents.put(docID,docList);
+ }
+ }
+ }
+ System.out.println("documents created");
+
+ return documents;
+ }
+
/**
* @param document
* @return
*/
private static boolean doImportDocument(String document, int nbtreatments) {
+
if (nbtreatments>400) {
return false;
}