Fix: when re-importing data into existing tables truncate needs cascade due to foreig...
[postgresql/geographic_data] / paf_convert.pl
1 #!/usr/bin/perl
2 # Glyn Astill - 02/03/2014
3 # Script to automate cs2cs transform from esatings/northings to latitudes/longitudes
4 # for PAF files from Royal Mail
5
6 #use diagnostics;
7 use strict;
8 use warnings;
9 use Getopt::Long qw/GetOptions/;
10 use Geo::Proj4;
11
12 use constant false => 0;
13 use constant true  => 1;
14
15 my $error;
16 my $usage = '-i <codepoint csv file input> -o <latlong csv file output>';
17 my $infile;
18 my $outfile;
19 my @latlong;
20 my $linecount;
21 my @infiles;
22 my @fields_fixed;
23 my $start_run = time();
24 my $end_run;
25 my $run_time;
26 my $pipe_cs2cs = true;
27 my $proj_ng;
28 my $proj_ig;
29
30 # Handle command line options
31 Getopt::Long::Configure('no_ignore_case');
32 use vars qw{%opt};
33 die $usage unless GetOptions(\%opt, 'infile|i=s', 'outfile|o=s', ) and keys %opt and ! @ARGV;
34
35 if (!defined($opt{infile})) {
36         print("Please specify an input file.\n");
37         die $usage;
38 }
39 else {
40         $infile = $opt{infile};
41 }
42 if (!defined($opt{outfile})) {
43         print("Please specify an output file.\n");
44         die $usage;
45 }
46 else {
47         $outfile = $opt{outfile};
48 }
49
50 if (!$pipe_cs2cs) {
51         # I'm not positive that the below is setting a the Bursa Wolf or another parameter correctly, hence loss in accuracy.
52         # it's possible to set the parameters individually i.e. "Geo::Proj4->new(proj => "tmerc", ellps => "airy", lat_0 => -49)" which may solve
53         $proj_ng = Geo::Proj4->new("-f '%.7f' +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +towgs84=446.448,-125.157,542.060,0.1502,0.2470,0.8421,-20.4894 +units=m +no_defs +to +proj=latlong +ellps=WGS84 +towgs84=0,0,0 +nodefs")
54                 or die "parameter error: ".Geo::Proj4->error. "\n";
55         $proj_ig = Geo::Proj4->new("-f '%.7f' +proj=tmerc +lat_0=53.5 +lon_0=-8 +k=1.000035 +x_0=200000 +y_0=250000 +ellps=airy +towgs84=482.530,-130.596,564.557,-1.042,-0.214,-0.631,8.15 +units=m +no_defs +to +proj=latlong +ellps=WGS84 +towgs84=0,0,0 +nodefs")
56                 or die "parameter error: ".Geo::Proj4->error. "\n";
57 }
58
59 # Read the codepoint file with eastings and northings, convert and output to outfile
60 # Open our output file which will contain the eastings/northings and longitudes/latitudes
61 if (open(OUTFILE, ">", $outfile)) {
62         
63         # get a list of our files 
64         $infile =~ s/ /\\ /g;
65         @infiles = glob("$infile");
66
67         foreach my $currfile (@infiles) {
68                 # read in the file
69                 $linecount = 0; 
70                 print ("Processing $currfile ..");
71                 unless (open(INFILE, "<", $currfile)) {
72                         print("ERROR: Could not open file:" . $! . ".\n");
73                 }
74                 while (<INFILE>) {
75                         @fields_fixed = unpack('a4 a3 a6 a5 a5 a9 a9 a9 a9 a9 a9 a1 a1', $_);
76
77                         eval {
78                                 # Deal with 6 digit northings
79                                 for ($fields_fixed[4]) {
80                                         s/[PO]/12/;
81                                         s/[UT]/11/;
82                                         s/[ZY]/10/;
83                                 }
84                                 if ($fields_fixed[4] =~ m/\s/) {
85                                         $fields_fixed[4] = sprintf("%-6s", $fields_fixed[4]) . " ";
86                                         $fields_fixed[3] .= ' ';
87                                 }
88                                 else {
89                                         $fields_fixed[4] = sprintf("%06d", $fields_fixed[4]) . "0";
90                                         $fields_fixed[3] .= '0';
91                                 }
92                                 
93                                 for (my $field = 0; $field <= 12; $field++) {
94                                         print OUTFILE ($fields_fixed[$field]);
95                                 }
96                                 
97                                 if ($fields_fixed[4] !~ m/\s/) {
98                                         # Irish Grid
99                                         if ($fields_fixed[0] =~ m/^BT/i) {
100                                                 if ($pipe_cs2cs) {
101                                                         open(CS2CS,"echo $fields_fixed[3] $fields_fixed[4] | cs2cs -f '%.7f' +proj=tmerc +lat_0=53.5 +lon_0=-8 +k=1.000035 +x_0=200000 +y_0=250000 +ellps=airy +towgs84=482.530,-130.596,564.557,-1.042,-0.214,-0.631,8.15 +units=m +no_defs +to +proj=latlong +ellps=WGS84 +towgs84=0,0,0 +nodefs |");
102                                                 }
103                                                 else {
104                                                         @latlong = $proj_ig->inverse($fields_fixed[3], $fields_fixed[4]);
105                                                 }                                               
106                                         }
107                                         # National Grid
108                                         else {
109                                                 if ($pipe_cs2cs) {
110                                                         open(CS2CS,"echo $fields_fixed[3] $fields_fixed[4] | cs2cs -f '%.7f' +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +towgs84=446.448,-125.157,542.060,0.1502,0.2470,0.8421,-20.4894 +units=m +no_defs +to +proj=latlong +ellps=WGS84 +towgs84=0,0,0 +nodefs |");
111                                                 }
112                                                 else {
113                                                         @latlong = $proj_ng->inverse($fields_fixed[3], $fields_fixed[4]);
114                                                 }
115                                         }
116
117                                         if ($pipe_cs2cs) {
118                                                 @latlong =  split(' ', <CS2CS>);
119                                                 close CS2CS;
120                                                 print OUTFILE (sprintf("%010f", $latlong[1]) . sprintf("%010f", $latlong[0]));
121                                         }
122                                         else {                                  
123                                                 print OUTFILE (sprintf("%010f", $latlong[0]) . sprintf("%010f", $latlong[1]));
124                                         }
125                                         
126                                 }
127                                 else {
128                                         print OUTFILE "                    ";
129                                 }
130                                 
131                                 print OUTFILE ("\n");
132                                 $linecount++;
133                                 if (($linecount%10000) == 0) {
134                                         print ("..$linecount");
135                                 }
136                         };
137                         if ($@) {
138                                 print("ERROR: Could not run command:" . $! . " Line = " . $_ . "\n");
139                                 print("$fields_fixed[3] $fields_fixed[4]");
140                         }
141                         
142                 }
143                 close (INFILE);
144                 print ("..OK\n");
145         }
146
147         close (OUTFILE);
148 }
149 else {
150         print("ERROR: Could not open file:" . $! . ".\n");
151 }
152
153 $end_run = time();
154 $run_time = (($end_run-$start_run)/60);
155
156 print "Conversion took $run_time minutes\n";
157 if ($pipe_cs2cs) {
158         print "To run quicker use Geo:proj4 (appears to be less accurate - a bug in my usage?) or stream the data directly to cs2cs in one go rather than constantly calling and opening the commands output\n";
159 }
160
161 exit(0);