1
|
<?php
|
2
|
|
3
|
/**
|
4
|
* This script converts the terms of the
|
5
|
* "World Geographical Scheme for recording plant distributions"
|
6
|
* avilable as text delimited files from
|
7
|
* http://www.nhm.ac.uk/hosted_sites/tdwg/geo2.htm into
|
8
|
* an taxomomy.xml file which can be imported into Drupal5
|
9
|
* as a vocabulary.
|
10
|
* The resulting xml file can be imported into Drupal by
|
11
|
* the taxonomy_xml module (http://drupal.org/project/taxonomy_xml).
|
12
|
*
|
13
|
* Since the "World Geographical Scheme" consusts of 4 separate tables
|
14
|
* each for one level of geographical detail this script takes 4 steps to import
|
15
|
* the according text delimited files.
|
16
|
*
|
17
|
* Direct download link to the text delimited files packed ad zip archive:
|
18
|
* http://www.nhm.ac.uk/hosted_sites/tdwg/geography_ed2.zip
|
19
|
*
|
20
|
* USAGE INSTRUCTIONS
|
21
|
* =======================================
|
22
|
* 1. Download geography_ed2.zip from the URL noted above
|
23
|
* 2. Edit the variable $zip_file
|
24
|
* 3. Run the script from your browser.
|
25
|
*
|
26
|
* Copyright (C) 2007 EDIT
|
27
|
* European Distributed Institute of Taxonomy
|
28
|
* http://www.e-taxonomy.eu
|
29
|
*
|
30
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
31
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
32
|
*/
|
33
|
|
34
|
|
35
|
$zip_file = "geography_ed2.zip";
|
36
|
|
37
|
$termfiles = array('tblLevel1.txt', 'tblLevel2.txt', 'tblLevel3.txt', 'tblLevel4.txt');
|
38
|
|
39
|
$vid = 20;
|
40
|
|
41
|
//-------------------------------------------------------------------------
|
42
|
|
43
|
$xml_header = '<?xml version="1.0" standalone="no"?>
|
44
|
<!DOCTYPE taxonomy SYSTEM "taxonomy.dtd">
|
45
|
<vocabulary>
|
46
|
<vid>'.$vid.'</vid>
|
47
|
<name>expertdb_georegions</name>
|
48
|
<description>Geographical region terms compatible with the TDWG GeographicalRegions LSID vocabulary (http://rs.tdwg.org/ontology/voc/GeographicRegion.rdf).
|
49
|
The regions are structured in a hierarchy having four levels of detail.</description>
|
50
|
<help></help>
|
51
|
<relations>0</relations>
|
52
|
<hierarchy>1</hierarchy>
|
53
|
<multiple>1</multiple>
|
54
|
<required>0</required>
|
55
|
<nodes></nodes>
|
56
|
<weight>0</weight>';
|
57
|
|
58
|
$xml_footer = '</vocabulary>';
|
59
|
|
60
|
class Term{
|
61
|
|
62
|
var $tid, $vid, $name, $description = '', $synonyms = '', $weight = 0, $depth = 0, $parent = 0;
|
63
|
|
64
|
function print_xml(){
|
65
|
// is currently ommittted !!!
|
66
|
print ("<term><tid>$this->tid</tid><vid>$this->vid</vid><name>$this->name</name><description>$this->description</description><synonyms>$this->synonyms</synonyms><weight>$this->weight</weight><depth>$this->depth</depth><parent>$this->parent</parent></term>\n");
|
67
|
}
|
68
|
|
69
|
}
|
70
|
|
71
|
/**
|
72
|
* Enter description here...
|
73
|
*
|
74
|
* @param unknown_type $line
|
75
|
* @param unknown_type $code_idx
|
76
|
* @param unknown_type $name_idx
|
77
|
* @param unknown_type $parentcode_idx
|
78
|
* @param unknown_type $synonym_idx
|
79
|
* @param unknown_type $description_idx
|
80
|
* @return unknown
|
81
|
*/
|
82
|
function addTerm($line, $code_idx, $name_idx, $parentcode_idx = false, $synonym_idx = false, $description_idx = false){
|
83
|
global $depth, $vid, $tid, $terms;
|
84
|
|
85
|
$tok = explode('*', $line);
|
86
|
//print(count($tok).'\t');
|
87
|
|
88
|
if(!$tok || count($tok) < 2){
|
89
|
return false;
|
90
|
}
|
91
|
|
92
|
$term = new Term();
|
93
|
$term->tid = $tid++;
|
94
|
$term->vid = $vid;
|
95
|
$term->depth = $depth;
|
96
|
|
97
|
$term->name = mb_convert_encoding($tok[$name_idx], 'UTF-8', 'latin1');
|
98
|
|
99
|
//print ($line."\n");
|
100
|
if($parentcode_idx){
|
101
|
$parentterm = $terms['L'.($depth - 1).'_'.$tok[$parentcode_idx]];
|
102
|
$term->parent = $parentterm->tid;
|
103
|
}
|
104
|
|
105
|
if($synonym_idx && count($tok) > $synonym_idx){
|
106
|
$term->synonyms = mb_convert_encoding ($tok[$synonym_idx], 'UTF-8', 'latin1');;
|
107
|
}
|
108
|
|
109
|
if($description_idx && count($tok) > $description_idx){
|
110
|
$term->description = mb_convert_encoding ($tok[$description_idx], 'UTF-8', 'latin1');
|
111
|
}
|
112
|
|
113
|
$terms['L'.$depth.'_'.$tok[$code_idx]] = $term;
|
114
|
|
115
|
}
|
116
|
|
117
|
// open zip file
|
118
|
|
119
|
$zip = new ZipArchive;
|
120
|
|
121
|
if ($zip->open($zip_file) === false) {
|
122
|
print('ERROR: invalid variable $zip_file: ['.$zip_file.'] Please read the instructions in the php script.');
|
123
|
exit(-1);
|
124
|
}
|
125
|
|
126
|
// read all terms as Term instances into a associative array using the code fields as key
|
127
|
$terms = array();
|
128
|
|
129
|
$tid = 20000;
|
130
|
|
131
|
// LEVEL 0:
|
132
|
// root term 'Terrestrial'
|
133
|
$depth = 0;
|
134
|
addTerm('_ROOT_*Terrestrial', 0, 1);
|
135
|
|
136
|
|
137
|
// LEVEL 1:
|
138
|
// L1 code*L1 continent
|
139
|
$depth += 1;
|
140
|
$text = $zip->getFromName($termfiles[$depth - 1]);
|
141
|
$lines = explode("\n", $text);
|
142
|
|
143
|
for($i = 1; $i < count($lines); $i++){
|
144
|
if( strlen(trim($lines[$i])) > 0){
|
145
|
$l = trim($lines[$i]).'*_ROOT_';
|
146
|
addTerm($l, 0, 1, 2);
|
147
|
}
|
148
|
}
|
149
|
|
150
|
// LEVEL 2:
|
151
|
// L2 code * L2 region * L1 code * L2 ISOcode
|
152
|
$depth += 1;
|
153
|
$text = $zip->getFromName($termfiles[$depth - 1]);
|
154
|
$lines = explode("\n", $text);
|
155
|
|
156
|
for($i = 1; $i < count($lines); $i++){
|
157
|
addTerm($lines[$i], 0, 1, 2, 3, false);
|
158
|
}
|
159
|
|
160
|
// LEVEL 3:
|
161
|
// L3 code * L3 area * L2 code * L3 ISOcode * Ed2status*Notes
|
162
|
|
163
|
$depth += 1;
|
164
|
//print("LEVEL 3:$termfiles[$depth]\n");
|
165
|
$text = $zip->getFromName($termfiles[$depth - 1]);
|
166
|
$lines = explode("\n", $text);
|
167
|
|
168
|
//print($lines[0]);
|
169
|
for($i = 1; $i < count($lines); $i++){
|
170
|
addTerm($lines[$i], 0, 1, 2, 3, 5);
|
171
|
}
|
172
|
|
173
|
// LEVEL 4:
|
174
|
// L4 code*L4 country*L3 code*L4 ISOcode*Ed2status*Notes
|
175
|
$depth += 1;
|
176
|
$text = $zip->getFromName($termfiles[$depth - 1]);
|
177
|
$lines = explode("\n", $text);
|
178
|
|
179
|
for($i = 1; $i < count($lines); $i++){
|
180
|
addTerm($lines[$i], 0, 1, 2, 3, 5);
|
181
|
}
|
182
|
|
183
|
|
184
|
$zip->close();
|
185
|
|
186
|
header("Content-Type: text/xml; charset=UTF-8");
|
187
|
header("Content-Type: text/xml");
|
188
|
print ($xml_header);
|
189
|
foreach ($terms as $t) {
|
190
|
$t->print_xml();
|
191
|
}
|
192
|
print ($xml_footer);
|
193
|
|
194
|
?>
|