package eu.etaxonomy.cdm.io.iapt;
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
+import eu.etaxonomy.cdm.api.service.pager.Pager;
import eu.etaxonomy.cdm.common.CdmUtils;
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
Pattern.compile("^(?<monthName>\\p{L}+\\.?)\\s(?<day>[0-9]{1,2})(?:st|rd|th)?\\.?,?\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like April 12, 1969 or april 12th 1999
Pattern.compile("^(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // April 99 or April, 1999 or Apr. 12
Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(\\s?)(?<month>[0-1]?[0-9])\\2\\3(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12.04.1969 or 12. 04. 1969 or 12/04/1969 or 12-04-1969
- Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(?<month>[IVX]{1,2})\\2(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12-VI-1969
- Pattern.compile("^(?:(?<day>[0-9]{1,2})(?:\\sde)\\s)(?<monthName>\\p{L}+)\\sde\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full and partial date like 12 de Enero de 1999 or Enero de 1999
+ Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(?<monthName>[IVX]{1,2})\\2(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12-VI-1969
+ Pattern.compile("^(?:(?<day>[0-9]{1,2})(?:\\sde)\\s)?(?<monthName>\\p{L}+)\\sde\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full and partial date like 12 de Enero de 1999 or Enero de 1999
Pattern.compile("^(?<month>[0-1]?[0-9])([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like 04.1969 or 04/1969 or 04-1969
Pattern.compile("^(?<year>(?:1[7,8,9])?[0-9]{2})([\\.\\-/])(?<month>[0-1]?[0-9])$"),// partial date like 1999-04
- Pattern.compile("^(?<month>[IVX]{1,2})([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like VI-1969
+ Pattern.compile("^(?<monthName>[IVX]{1,2})([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like VI-1969
Pattern.compile("^(?<day>[0-9]{1,2})(?:[\\./]|th|rd|st)?\\s(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12. April 1969 or april 1999 or 22 Dec.1999
};
- private static final Pattern typeSpecimenSplitPattern = Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype[^:]*:(?<isotype>.*)\\.?)?\\.?$");
+ private static final Pattern typeSpecimenSplitPattern = Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype.*?[:\\(](?<isotype>.*)\\.?)?\\.?$");
private static final Pattern typeNameBasionymPattern = Pattern.compile("\\([Bb]asionym\\s?\\:\\s?(?<basionymName>[^\\)]*).*$");
private static final Pattern typeNameNotePattern = Pattern.compile("\\[([^\\[]*)"); // matches the inner of '[...]'
private static final Pattern[] specimenTypePatterns = new Pattern[]{
Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:\\((?<institute>.*[^\\)])\\))(?<accNumber>.*)?$"), // like: GAUF (Gansu Agricultural University) No. 1207-1222
Pattern.compile("^(?<colCode>[A-Z]+|CPC Micropaleontology Lab\\.?)\\s+(?:Coll\\.\\s(?<subCollection>[^\\.,;]*)(.))(?<accNumber>.*)?$"), // like KASSEL Coll. Krasske, Praep. DII 78
- Pattern.compile("^(?:Coll\\.\\s(?<subCollection>[^\\.,;]*)(.))(?<institute>.*?)(?<accNumber>Praep\\..*)?$"), // like Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany Praep. Neukaledonien OTL 62
+ Pattern.compile("^(?:in\\s)?(?<institute>[Cc]oll\\.\\s.*?)(?:\\s+(?<accNumber>(Praep\\.|slide|No\\.|Inv\\. Nr\\.|Nr\\.).*))?$"), // like Coll. Lange-Bertalot, Bot. Inst., Univ. Frankfurt/Main, Germany Praep. Neukaledonien OTL 62
+ Pattern.compile("^(?<institute>Inst\\.\\s.*?)\\s+(?<accNumber>N\\s.*)?$"), // like Inst. Geological Sciences, Acad. Sci. Belarus, Minsk N 212 A
Pattern.compile("^(?<colCode>[A-Z]+)(?:\\s+(?<accNumber>.*))?$"), // identifies the Collection code and takes the rest as accessionNumber if any
};
private Reference bookVariedadesTradicionales = null;
+ /**
+ * HACK for unit simple testing
+ */
+ boolean _testMode = System.getProperty("TEST_MODE") != null;
+
private Taxon makeTaxon(HashMap<String, String> record, SimpleExcelTaxonImportState<CONFIG> state,
TaxonNode higherTaxonNode, boolean isFossil) {
Partial pupDate = null;
boolean restoreOriginalReference = false;
+ boolean nameIsValid = true;
// preprocess nomRef: separate citation, reference detail, publishing date
if(!StringUtils.isEmpty(nomRefStr)){
if(!StringUtils.isEmpty(notesTxt)){
notesTxt = notesTxt.replace("Notes: ", "").trim();
taxonName.addAnnotation(Annotation.NewInstance(notesTxt, AnnotationType.EDITORIAL(), Language.DEFAULT()));
+ nameIsValid = false;
+
}
if(!StringUtils.isEmpty(caveats)){
caveats = caveats.replace("Caveats: ", "").trim();
taxonName.addAnnotation(Annotation.NewInstance(caveats, annotationTypeCaveats(), Language.DEFAULT()));
+ nameIsValid = false;
+ }
+
+ if(nameIsValid){
+ // Status is always considered valid if no notes and cavets are set
+ taxonName.addStatus(NomenclaturalStatus.NewInstance(NomenclaturalStatusType.VALID()));
}
getNameService().save(taxonName);
}
getTaxonService().save(taxon);
+
+ if(taxonName.getRank().equals(Rank.SPECIES()) || taxonName.getRank().isLower(Rank.SPECIES())){
+ // try to find the genus, it should have been imported already, Genera are coming first in the import file
+ Taxon genus = ((IAPTImportState)state).getGenusTaxonMap().get(taxonName.getGenusOrUninomial());
+ if(genus != null){
+ higherTaxonNode = genus.getTaxonNodes().iterator().next();
+ } else {
+ logger.info(csvReportLine(regNumber, "Parent genus not found for", nameStr));
+ }
+ }
+
if(higherTaxonNode != null){
higherTaxonNode.addChildTaxon(taxon, null, null);
getTaxonNodeService().save(higherTaxonNode);
}
+ if(taxonName.getRank().isGenus()){
+ ((IAPTImportState)state).getGenusTaxonMap().put(taxonName.getGenusOrUninomial(), taxon);
+ }
+
return taxon;
}
return fieldUnit;
}
- private Partial parseDate(String regNumber, String dateStr) {
+ protected Partial parseDate(String regNumber, String dateStr) {
Partial pupDate = null;
boolean parseError = false;
* @param regNumber
* @return
*/
- private DerivedUnit parseSpecimenType(FieldUnit fieldUnit, TypesName typeName, Collection collection, String text, String regNumber) {
+ protected DerivedUnit parseSpecimenType(FieldUnit fieldUnit, TypesName typeName, Collection collection, String text, String regNumber) {
DerivedUnit specimen = null;
String collectionCode = null;
+ String collectionTitle = null;
String subCollectionStr = null;
String instituteStr = null;
String accessionNumber = null;
for (Pattern p : specimenTypePatterns) {
Matcher m = p.matcher(text);
if (m.matches()) {
- // collection code is mandatory
+ // collection code or collectionTitle is mandatory
try {
collectionCode = m.group("colCode");
} catch (IllegalArgumentException e){
// match group colCode not found
}
+
try {
- subCollectionStr = m.group("subCollection");
+ instituteStr = m.group("institute");
} catch (IllegalArgumentException e){
- // match group subCollection not found
+ // match group col_name not found
}
+
try {
- instituteStr = m.group("institute");
+ subCollectionStr = m.group("subCollection");
} catch (IllegalArgumentException e){
- // match group col_name not found
+ // match group subCollection not found
}
try {
accessionNumber = m.group("accNumber");
for(String text : nameAnnotations.keySet()){
taxonName.addAnnotation(Annotation.NewInstance(text, nameAnnotations.get(text), Language.DEFAULT()));
}
- getNameService().save(taxonName);
}
+
+ taxonName.addSource(OriginalSourceType.Import, regNumber, null, state.getConfig().getSourceReference(), null);
+
+ getNameService().save(taxonName);
+
return taxonName;
}
collection.setSuperCollection(superCollection);
}
collectionMap.put(key, collection);
- getCollectionService().save(collection);
+ if(!_testMode) {
+ getCollectionService().save(collection);
+ }
}
return collection;
value = StringUtils.replace(value, "c$k", "č");
value = StringUtils.replace(value, " U$K", " Š");
+ value = StringUtils.replace(value, "O>U>!", "Ø");
+ value = StringUtils.replace(value, "o>!", "ø");
+ value = StringUtils.replace(value, "S$K", "Ŝ");
+ value = StringUtils.replace(value, ">l", "ğ");
+
+ value = StringUtils.replace(value, "§B>i", "ł");
+
+
+
return value;
}
((IAPTImportState)state).setCurrentTaxon(taxon);
+ logger.info("#of imported Genera: " + ((IAPTImportState) state).getGenusTaxonMap().size());
return;
}
} else if(name.matches("^Incertae sedis$|^No group assigned$")){
return rankFamilyIncertisSedis();
} else if(name.matches(".*phyta$|.*mycota$")){
- return Rank.SECTION_BOTANY();
+ return Rank.PHYLUM();
} else if(name.matches(".*phytina$|.*mycotina$")){
- return Rank.SUBSECTION_BOTANY();
+ return Rank.SUBPHYLUM();
} else if(name.matches("Gymnospermae$|.*ones$")){ // Monocotyledones, Dicotyledones
return rankUnrankedSupraGeneric();
} else if(name.matches(".*opsida$|.*phyceae$|.*mycetes$|.*ones$|^Musci$|^Hepaticae$")){