Project

General

Profile

Download (5.46 KB) Statistics
| Branch: | Tag: | Revision:
1
<?php
2

    
3
/**
4
 * This script converts the terms of the 
5
 * "World Geographical Scheme for recording plant distributions" 
6
 * avilable as text delimited files from  
7
 * http://www.nhm.ac.uk/hosted_sites/tdwg/geo2.htm into
8
 * an taxomomy.xml file which can be imported into Drupal5
9
 * as a vocabulary. 
10
 * The resulting xml file can be imported into Drupal by 
11
 * the taxonomy_xml module (http://drupal.org/project/taxonomy_xml).
12
 * 
13
 * Since the "World Geographical Scheme" consusts of 4 separate tables
14
 * each for one level of geographical detail this script takes 4 steps to import 
15
 * the according text delimited files.
16
 * 
17
 * Direct download link to the text delimited files packed ad zip archive:
18
 * http://www.nhm.ac.uk/hosted_sites/tdwg/geography_ed2.zip
19
 * 
20
 * USAGE INSTRUCTIONS
21
 * =======================================
22
 * 1. Download geography_ed2.zip from the URL noted above
23
 * 2. Edit the variable $zip_file
24
 * 3. Run the script from your browser.
25
 * 
26
 * Copyright (C) 2007 EDIT
27
 * European Distributed Institute of Taxonomy 
28
 * http://www.e-taxonomy.eu
29
 * 
30
 * The contents of this file are subject to the Mozilla Public License Version 1.1
31
 * See LICENSE.TXT at the top of this package for the full license terms.
32
 */
33

    
34

    
35
$zip_file = "geography_ed2.zip";
36

    
37
$termfiles = array('tblLevel1.txt', 'tblLevel2.txt', 'tblLevel3.txt', 'tblLevel4.txt');
38

    
39
$vid = 20;
40

    
41
//-------------------------------------------------------------------------
42

    
43
$xml_header = '<?xml version="1.0" standalone="no"?>
44
<!DOCTYPE taxonomy SYSTEM "taxonomy.dtd">
45
<vocabulary>
46
<vid>'.$vid.'</vid>
47
<name>expertdb_georegions</name>
48
<description>Geographical region terms compatible with the TDWG GeographicalRegions LSID vocabulary (http://rs.tdwg.org/ontology/voc/GeographicRegion.rdf).
49
The regions are structured in a hierarchy having four levels of detail.</description>
50
<help></help>
51
<relations>0</relations>
52
<hierarchy>1</hierarchy>
53
<multiple>1</multiple>
54
<required>0</required>
55
<nodes></nodes>
56
<weight>0</weight>';
57

    
58
$xml_footer = '</vocabulary>';
59

    
60
class Term{
61
  
62
  var $tid, $vid, $name, $description = '', $synonyms = '', $weight = 0, $depth = 0, $parent = 0;
63

    
64
  function print_xml(){
65
   //  is currently ommittted !!!
66
    print ("<term><tid>$this->tid</tid><vid>$this->vid</vid><name>$this->name</name><description>$this->description</description><synonyms>$this->synonyms</synonyms><weight>$this->weight</weight><depth>$this->depth</depth><parent>$this->parent</parent></term>\n");
67
  }
68

    
69
}
70

    
71
/**
72
 * Enter description here...
73
 *
74
 * @param unknown_type $line
75
 * @param unknown_type $code_idx
76
 * @param unknown_type $name_idx
77
 * @param unknown_type $parentcode_idx
78
 * @param unknown_type $synonym_idx
79
 * @param unknown_type $description_idx
80
 * @return unknown
81
 */
82
function addTerm($line, $code_idx, $name_idx, $parentcode_idx = false, $synonym_idx = false, $description_idx = false){
83
  global $depth, $vid, $tid, $terms;
84
  
85
  $tok = explode('*', $line);
86
  //print(count($tok).'\t');
87

    
88
  if(!$tok || count($tok) < 2){
89
     return false; 
90
  }
91
  
92
  $term = new Term();
93
  $term->tid = $tid++;
94
  $term->vid = $vid;
95
  $term->depth = $depth;
96

    
97
  $term->name = mb_convert_encoding($tok[$name_idx], 'UTF-8', 'latin1');
98
    
99
  //print ($line."\n");
100
  if($parentcode_idx){
101
    $parentterm = $terms['L'.($depth - 1).'_'.$tok[$parentcode_idx]];
102
    $term->parent = $parentterm->tid;
103
  }
104

    
105
  if($synonym_idx && count($tok) > $synonym_idx){
106
    $term->synonyms = mb_convert_encoding ($tok[$synonym_idx], 'UTF-8', 'latin1');;
107
  } 
108
  
109
  if($description_idx && count($tok) > $description_idx){
110
    $term->description = mb_convert_encoding ($tok[$description_idx], 'UTF-8', 'latin1');
111
  }
112
  
113
  $terms['L'.$depth.'_'.$tok[$code_idx]] = $term;
114
  
115
}
116

    
117
// open zip file
118

    
119
$zip = new ZipArchive;
120

    
121
if ($zip->open($zip_file) === false) {
122
  print('ERROR: invalid variable $zip_file: ['.$zip_file.'] Please read the instructions in the php script.');
123
  exit(-1);
124
}
125

    
126
// read all terms as Term instances into a associative array using the code fields as key
127
$terms = array();
128

    
129
$tid = 20000;
130

    
131
// LEVEL 0:
132
// 	root term 'Terrestrial'
133
$depth = 0;
134
addTerm('_ROOT_*Terrestrial', 0, 1);
135

    
136

    
137
// LEVEL 1:
138
// 	L1 code*L1 continent
139
$depth += 1;
140
$text = $zip->getFromName($termfiles[$depth - 1]);
141
$lines = explode("\n", $text);
142

    
143
for($i = 1; $i < count($lines); $i++){
144
  if( strlen(trim($lines[$i])) > 0){
145
    $l = trim($lines[$i]).'*_ROOT_';
146
    addTerm($l, 0, 1, 2);
147
  }
148
}
149

    
150
// LEVEL 2:
151
// 	L2 code * L2 region * L1 code * L2 ISOcode
152
$depth += 1;
153
$text = $zip->getFromName($termfiles[$depth - 1]);
154
$lines = explode("\n", $text);
155

    
156
for($i = 1; $i < count($lines); $i++){
157
  addTerm($lines[$i], 0, 1, 2, 3, false);
158
}
159

    
160
// LEVEL 3:
161
// 	L3 code * L3 area * L2 code * L3 ISOcode * Ed2status*Notes
162

    
163
$depth += 1;
164
//print("LEVEL 3:$termfiles[$depth]\n");
165
$text = $zip->getFromName($termfiles[$depth - 1]);
166
$lines = explode("\n", $text);
167

    
168
//print($lines[0]);
169
for($i = 1; $i < count($lines); $i++){
170
  addTerm($lines[$i], 0, 1, 2, 3, 5);
171
}
172

    
173
// LEVEL 4:
174
// 	L4 code*L4 country*L3 code*L4 ISOcode*Ed2status*Notes
175
$depth += 1;
176
$text = $zip->getFromName($termfiles[$depth - 1]);
177
$lines = explode("\n", $text);
178

    
179
for($i = 1; $i < count($lines); $i++){
180
  addTerm($lines[$i], 0, 1, 2, 3, 5);
181
}
182

    
183

    
184
$zip->close();
185

    
186
header("Content-Type: text/xml; charset=UTF-8");
187
header("Content-Type: text/xml");
188
print ($xml_header);
189
foreach ($terms as $t) {
190
	$t->print_xml();
191
}
192
print ($xml_footer);
193

    
194
?>
(1-1/7)