Ticket #287: data_handler.py

File data_handler.py, 25.8 KB (added by benj, 15 years ago)

BIL file handler library (needed by dimfixer.py)

Line 
1##############################################################################
2# Python library to deal with BIL (Band Interleaved by Line) and BSQ (Band Sequential) files
3#
4# Author: Ben Taylor
5#
6# History:
7# 12th Feb. 2009: (benj) Created
8# 9th Jun. 2009: (benj) Added writeData function
9# 20th Aug. 2009: (benj) Added append option to writeDataFile
10#
11# Available functions:
12# readxy: Wrapper function for readBil/readBsq that allows you to omit the number of bands in the file (works it out from the file size)
13# readxb: Wrapper function for readBil/readBsq that allows you to omit the number of lines in the file (works it out from the file size)
14# readyb: Wrapper function for readBil/readBsq that allows you to omit the number of pixels per line (works it out from the file size)
15# readBil: Reads a BIL file and returns a list containing the data from the file
16# readBilLine: Reads a line from an open BIL file and returns a list containing the data that was read
17# readBsq: Reads a BSQ file and returns a list containing the data from the file
18# readBsqBand: Reads a band from an open BSQ file and returns a list containing the data that was read
19# writeData: Writes data to an output file straight from an input list
20# writeDataFile: Writes a BIL or BSQ file from a 1D list containing data already in the right file order
21# write3DData: Writes a BIL or BSQ file from a 3D list with 1st dimension band number, 2nd dimension line number and 3rd dimension pixel number (unfolds to a 1D array then calls writeDataFile)
22# writeHdrFile: Writes an ENVI .hdr file to be associated with a BIL or BSQ file
23# readHdrFile: Reads data from a given ENVI-style header file
24# getEnviType: Gets the ENVI type code equivalent to a particular Python struct format string
25# getStructType: Gets the Python struct format string equivalent to a particular ENVI type code
26#
27# You may use or alter this script as you wish, but no warranty of any kind is offered, nor is it guaranteed
28# not to cause security holes in an unsafe environment.
29##############################################################################
30
31import os
32import stat
33import struct
34import re
35import sys
36
37defformat = "h" # Default data format (2-byte signed short int)
38
39# Function readxy
40# Wrapper function for readBil/readBsq that allows you to omit the number of bands in the file (works it out from the file size)
41# See readBil/readBsq description for arguments and return value
42# filetype: "bil" or "bsq" appropriately
43def readxy(filename, numlines, pixperline, dataformat=defformat, filetype="bil"):
44    fileinfo = os.stat(filename)
45    filesize = fileinfo[stat.ST_SIZE]
46   
47    # Check given format string is valid
48    try:
49        bytesperpix = struct.calcsize(dataformat)
50    except:
51        raise ValueError, "Supplied data format " + str(dataformat) + " is invalid"
52    # end try
53   
54    numbands = ((filesize / float(numlines)) / float(pixperline)) / float(bytesperpix)
55   
56    # Should be an integer, if it's not then one of the given attributes is wrong or the file is corrupt
57    if (numbands == int(numbands)):
58        if (filetype == "bil"):
59            return readBil(filename, int(numlines), int(pixperline), int(numbands), dataformat)
60        else:
61            if (filetype == "bsq"):
62                return readBsq(filename, int(numlines), int(pixperline), int(numbands), dataformat)
63            else:
64                raise ValueError, "File type argument must be either 'bil' or 'bsq', got: " + filetype
65            # end if
66        # end if
67    else:
68        raise ValueError, "File size and supplied attributes do not match"
69    # end if
70# end function
71
72# Function readxb
73# Wrapper function for readBil/readBsq that allows you to omit the number of lines in the file (works it out from the file size)
74# See readBil/readBsq description for arguments and return value
75# filetype: "bil" or "bsq" appropriately
76def readxb(filename, pixperline, numbands, dataformat=defformat, filetype="bil"):
77    fileinfo = os.stat(filename)
78    filesize = fileinfo[stat.ST_SIZE]
79   
80    # Check given format string is valid
81    try:
82        bytesperpix = struct.calcsize(dataformat)
83    except:
84        raise ValueError, "Supplied data format " + str(dataformat) + " is invalid"
85    # end try
86   
87    numlines = ((filesize / float(numbands)) / float(pixperline)) / float(bytesperpix)
88   
89    # Should be an integer, if it's not then one of the given attributes is wrong or the file is corrupt
90    if (numlines == int(numlines)):
91        if (filetype == "bil"):
92            return readBil(filename, int(numlines), int(pixperline), int(numbands), dataformat)
93        else:
94            if (filetype == "bsq"):
95                return readBsq(filename, int(numlines), int(pixperline), int(numbands), dataformat)
96            else:
97                raise ValueError, "File type argument must be either 'bil' or 'bsq', got: " + filetype
98            # end if
99        # end if
100    else:
101        raise ValueError, "File size and supplied attributes do not match"
102    # end if
103# end function
104
105# Function readyb
106# Wrapper function for readBil/readBsq that allows you to omit the number of pixels per line (works it out from the file size)
107# See readBil/readBsq description for arguments and return value
108# filetype: "bil" or "bsq" appropriately
109def readyb(filename, numlines, numbands, dataformat=defformat, filetype="bil"):
110    fileinfo = os.stat(filename)
111    filesize = fileinfo[stat.ST_SIZE]
112   
113    # Check given format string is valid
114    try:
115        bytesperpix = struct.calcsize(dataformat)
116    except:
117        raise ValueError, "Supplied data format " + str(dataformat) + " is invalid"
118    # end try
119   
120    pixperline = ((filesize / float(numbands)) / float(numlines)) / float(bytesperpix)
121   
122    # Should be an integer, if it's not then one of the given attributes is wrong or the file is corrupt
123    if (numlines == int(numlines)):
124        if (filetype == "bil"):
125            return readBil(filename, int(numlines), int(pixperline), int(numbands), dataformat)
126        else:
127            if (filetype == "bsq"):
128                return readBsq(filename, int(numlines), int(pixperline), int(numbands), dataformat)
129            else:
130                raise ValueError, "File type argument must be either 'bil' or 'bsq', got: " + filetype
131            # end if
132        # end if
133    else:
134        raise ValueError, "File size and supplied attributes do not match"
135    # end if
136# end function
137
138# Function readBil
139# Reads a BIL file and returns a list containing the data from the file
140#
141# Arguments:
142# filename: Name of file to read
143# numlines: Number of lines of data in the file
144# pixperline: Number of pixels on a line
145# numbands: Number of bands in the file
146# dataformat: Format string for data, as Python struct definition
147#
148# Returns: A list containing the data from filename formatted as a list of bands
149#   containing a list of lines, each containing a list of pixel values
150def readBil(filename, numlines, pixperline, numbands, dataformat=defformat):
151
152    # Check file exists and is a file
153    if (not os.path.isfile(filename)):
154        raise ValueError, "Supplied filename " + str(filename) + " does not exist"
155    # end if
156   
157    # Check given format string is valid
158    try:
159        bytesperpix = struct.calcsize(dataformat)
160    except:
161        raise ValueError, "Supplied data format " + str(dataformat) + " is invalid"
162    # end try
163   
164    # Check file size matches with size attributes
165    fileinfo = os.stat(filename)
166    filesize = fileinfo[stat.ST_SIZE]
167    checknum = (((filesize / float(numbands)) / float(numlines)) / float(bytesperpix)) / pixperline
168    if (checknum != 1):
169        raise ValueError, "File size and supplied attributes do not match"
170    # end if
171   
172    # Open the file for reading in binary mode
173    try:
174        bilfile = open(filename, "rb")
175    except:
176        print "Failed to open BIL file " + filename
177        raise
178    # end try
179   
180    # Create a list of bands containing an empty list for each band
181    bands = [[] for i in range(0, numbands)]
182   
183    # BIL format so have to cycle through lines at top level rather than bands
184    for linenum in range(0, numlines):
185        for bandnum in range(0, numbands):
186           
187            if (linenum == 0):
188                # For each band create an empty list of lines in the band, but only the first time
189                bands[bandnum] = [[] for i in range(0, numlines)]
190            # end if
191           
192            for pixnum in range(0, pixperline):
193           
194                # Read one data item (pixel) from the data file. No error checking because we want this to fall over
195                # if it fails.
196                dataitem = bilfile.read(bytesperpix)
197               
198                # If we get a blank string then we hit EOF early, raise an error
199                if (dataitem == ""):
200                    raise EOFError, "Ran out of data to read before we should have"
201                # end if
202               
203                # If everything worked, unpack the binary value and store it in the appropriate pixel value
204                bands[bandnum][linenum].append(struct.unpack(dataformat, dataitem)[0])
205            # end for
206        # end for
207    # end for
208    bilfile.close()
209   
210    return bands
211# end function
212
213# Function readBilLine
214# Reads a line of data from an open BIL file
215#
216# Arguments:
217# bilfile: Open BIL file object
218# pixperline: Number of pixels on a line
219# numbands: Number of bands in the file
220# dataformat: Format string for data, as Python struct definition
221#
222# Returns: A 2D list with the band number in the first dimension and the pixel number in the second, containing the data values
223#   for the line that was read
224def readBilLine(bilfile, pixperline, numbands, dataformat=defformat):
225    line = []
226   
227    # Get the size in bytes for the given data format
228    itemsize = struct.calcsize(dataformat)
229   
230    # For each pixel in each band, read a data item, unpack it and store it in the output list
231    for bandnum in range(0, numbands):
232        line.append([])
233        for pixnum in range(0, pixperline):
234            dataitem = bilfile.read(itemsize)
235           
236            if ((dataitem == "") or (len(dataitem) < itemsize)):
237                raise EOFError, "Ran out of data to read before we should have"
238            # end if
239           
240            line[bandnum].append(struct.unpack(dataformat, dataitem)[0])
241        # end for
242    # end for
243   
244    return line
245# end function
246
247# Function readBsq
248# Reads a BSQ file and returns a list containing the data from the file
249#
250# Arguments:
251# filename: Name of file to read
252# numlines: Number of lines of data in the file
253# pixperline: Number of pixels on a line
254# numbands: Number of bands in the file
255# dataformat: Format string for data, as Python struct definition
256#
257# Returns: A list containing the data from filename formatted as a list of bands
258#   containing a list of lines, each containing a list of pixel values
259def readBsq(filename, numlines, pixperline, numbands, dataformat=defformat):
260
261    # Check file exists and is a file
262    if (not os.path.isfile(filename)):
263        raise ValueError, "Supplied filename " + str(filename) + " does not exist"
264    # end if
265   
266    # Check given format string is valid
267    try:
268        bytesperpix = struct.calcsize(dataformat)
269    except:
270        raise ValueError, "Supplied data format " + str(dataformat) + " is invalid"
271    # end try
272   
273    # Check file size matches with size attributes
274    fileinfo = os.stat(filename)
275    filesize = fileinfo[stat.ST_SIZE]
276    checknum = (((filesize / float(numbands)) / float(numlines)) / float(bytesperpix)) / pixperline
277    if (checknum != 1):
278        raise ValueError, "File size and supplied attributes do not match"
279    # end if
280   
281    # Open the file for reading in binary mode
282    try:
283        bsqfile = open(filename, "rb")
284    except:
285        print "Failed to open BSQ file " + filename
286        raise
287    # end try
288   
289    # Create a list of bands containing an empty list for each band
290    bands = []
291   
292    # Read data for each band at a time
293    for bandnum in range(0, numbands):
294        bands.append([])
295       
296        for linenum in range(0, numlines):
297           
298            bands[bandnum].append([])
299           
300            for pixnum in range(0, pixperline):
301           
302                # Read one data item (pixel) from the data file. No error checking because we want this to fall over
303                # if it fails.
304                dataitem = bsqfile.read(bytesperpix)
305               
306                # If we get a blank string then we hit EOF early, raise an error
307                if (dataitem == ""):
308                    raise EOFError, "Ran out of data to read before we should have"
309                # end if
310               
311                # If everything worked, unpack the binary value and store it in the appropriate pixel value
312                bands[bandnum][linenum].append(struct.unpack(dataformat, dataitem)[0])
313            # end for
314        # end for
315    # end for
316   
317    bsqfile.close()
318   
319    return bands
320# end function
321
322# Function readBsqBand
323# Reads a band of data from an open BSQ file
324#
325# Arguments:
326# bsqfile: Open BSQ file object
327# pixperline: Number of pixels on a line
328# numlines: Number of lines in the file
329# dataformat: Format string for data, as Python struct definition
330#
331# Returns: A 2D list with the band number in the first dimension and the pixel number in the second, containing the data values
332#   for the line that was read
333def readBsqBand(bsqfile, pixperline, numlines, dataformat=defformat):
334    band = []
335   
336    # Get the size in bytes for the given data format
337    itemsize = struct.calcsize(dataformat)
338   
339    # For each pixel in each band, read a data item, unpack it and store it in the output list
340    for linenum in range(0, numlines):
341        band.append([])
342        for pixnum in range(0, pixperline):
343            dataitem = bsqfile.read(itemsize)
344           
345            if ((dataitem == "") or (len(dataitem) < itemsize)):
346                raise EOFError, "Ran out of data to read before we should have"
347            # end if
348           
349            band[linenum].append(struct.unpack(dataformat, dataitem)[0])
350        # end for
351    # end for
352   
353    return band
354# end function
355
356# Function writeData
357# Writes data to an output file straight from an input list
358#
359# Arguments:
360# data: List containing data to be written
361# datafile: Open (binary) data file to write to
362# dataformat: Format string for data, as Python struct definition
363def writeData(data, datafile, dataformat=defformat):
364   
365    # Check given format string is valid
366    try:
367        bytesperpix = struct.calcsize(dataformat)
368    except:
369        raise ValueError, "Supplied data format " + str(dataformat) + " is invalid"
370    # end try
371   
372    # Get the size in bytes for the given data format
373    itemsize = struct.calcsize(dataformat)
374   
375    # Write data to file in order
376    for dataitem in data:
377        try:
378            packeditem = struct.pack(dataformat, dataitem)
379        except:
380            datafile.close()
381            raise IOError, "Could not pack " + str(dataitem) + " into " + str(bytesperpix) + " bytes. Reason: " + str(sys.exc_info()[1])
382        # end try
383       
384        try:
385            datafile.write(packeditem)
386        except:
387            datafile.close()
388            raise IOError, "Failed to write to data file. Reason: " + str(sys.exc_info()[1])           
389        # end try
390    # end for
391# end function
392
393# Function writeDataFile
394# Writes a data file (BIL or BSQ) from a 1D list containing data already in the right file order
395# (ie data are written to the file in the order that they're in the list)
396#
397# Arguments:
398# data: List containing data to write to the file
399# filename: Name of file to be written to
400# dataformat: Format string for data, as Python struct definition
401# append: If True then appends the data to the end of the file rather than writing a new blank file. Default False
402def writeDataFile(data, filename, dataformat=defformat, append=False):
403
404    # Get correct format string to open the file with
405    if (append):
406        writeformat = "ab"
407    else:
408        writeformat = "wb"
409    # end if
410   
411    # Open the data file for writing in binary mode
412    try:
413        datafile = open(filename, writeformat)
414    except:
415        print "Could not open data file " + str(filename) + " for writing"
416        raise
417    # end try
418   
419    # Check given format string is valid
420    try:
421        bytesperpix = struct.calcsize(dataformat)
422    except:
423        raise ValueError, "Supplied data format " + str(dataformat) + " is invalid"
424    # end try
425       
426    # Pack each data item into binary data and write it to the output file
427    for dataitem in data:
428        try:
429            packeditem = struct.pack(dataformat, dataitem)
430        except:
431            datafile.close()
432            print "Could not pack " + str(dataitem) + " into " + str(bytesperpix) + " bytes"
433            raise
434        # end try
435       
436        try:
437            datafile.write(packeditem)
438        except:
439            print "Failed to write to data file. Reason: " + str(sys.exc_info()[1])
440            datafile.close()
441            raise
442        # end try
443    # end for
444   
445    datafile.close()
446# end function
447
448# Function write3DData
449# Writes a BIL or BSQ file from a 3D list with 1st dimension band number, 2nd dimension line number
450# and 3rd dimension pixel number (unfolds to a 1D array then calls writeDataFile)
451#
452# Arguments:
453# data: List containing data to write to the file
454# filename: Name of file to be written to
455# writehdr: Flag denoting whether to write an ENVI header file (default true)
456# dataformat: Format string for data, as Python struct definition
457# interleave: "bil" or "bsq" appropriately
458def write3DData(data, filename, writehdr=True, dataformat=defformat, interleave="bil"):
459
460    # Store numbers of bands, lines and pixels per line for convenience
461    numbands = len(data)
462    numlines = len(data[0])
463    pixperline = len(data[0][0])
464   
465    # Check given format string is valid
466    try:
467        bytesperpix = struct.calcsize(dataformat)
468    except:
469        raise ValueError, "Supplied data format " + str(dataformat) + " is invalid"
470    # end try
471   
472    # Create list for unfolding to
473    outdata = [0.0 for i in range(0, numbands * numlines * pixperline)]
474   
475    # Run through the data array and put all the data in the right place in the unfolded list
476    for bandnum in range(0, numbands):
477        for linenum in range(0, numlines):
478            for pixnum in range(0, pixperline):
479                # Work out appropriate index within BIL file format for next pixel and store in 1D data array
480                if (interleave == "bil"):
481                    pixindex = (pixperline * numbands * linenum) + (pixperline * bandnum) + pixnum
482                else:
483                    if (interleave == "bsq"):
484                        pixindex = (pixperline * numbands * linenum) + (pixperline * linenum) + pixnum
485                    else:
486                        raise ValueError, "Interleave argument to write3DData must be either 'bil' or 'bsq', got: " + interleave
487                    # end if
488                # end if
489                outdata[pixindex] = data[bandnum][linenum][pixnum]
490            # end for
491        # end for
492    # end for
493   
494    # Write output file (or throw an error if there's no data to write)
495    if (len(outdata) > 0):
496        writeDataFile(outdata, filename, dataformat)
497    else:
498        raise ValueError, "One or more dimensions of the data array were 0"
499    # end if
500   
501    # Write header file if requested
502    if (writehdr):
503        try:
504            # Check ENVI data type
505            datatype = getEnviType(dataformat)
506        except:
507            datafile.close()
508            print "Unable to generate header for type " + dataformat + ", data type is not valid for ENVI"
509        # end try
510       
511        writeHdrFile(filename + ".hdr", pixperline, numlines, numbands, datatype, interleave)
512    # end if
513# end function
514
515# Function writeHdrFile
516# Writes an ENVI .hdr file to be associated with a data file
517#
518# Arguments:
519# filename: Name of .hdr file to be written
520# samples: Number of pixels per line (samples)
521# lines: Number of lines
522# bands: Number of bands
523# datatype: Numeric code for relevant data type
524def writeHdrFile(filename, samples, lines, bands, datatype, interleave="bil"):
525    try:
526        hdrfile = open(filename, "w")
527    except:
528        print "Could not open header file " + str(filename) + " for writing"
529        raise
530    # end try
531   
532    hdrfile.write("ENVI\n")
533    hdrfile.write("description = { Created by bil_handler.py }\n")
534    hdrfile.write("samples = " + str(samples) + "\n")
535    hdrfile.write("lines   = " + str(lines) + "\n")
536    hdrfile.write("bands   = " + str(bands) + "\n")
537    hdrfile.write("header offset = 0\n")
538    hdrfile.write("file type = ENVI Standard\n")
539    hdrfile.write("data type = " + str(datatype) + "\n")
540    hdrfile.write("interleave = " + interleave + "\n")
541    hdrfile.write("byte order = 0\n")
542   
543    hdrfile.flush()
544    hdrfile.close()
545# end function
546
547# Function readHdrFile
548# Reads data from a given ENVI-style header file
549#
550# Arguments
551# hdrfilename: Name of header file to be read
552#
553# Returns: Dictionary containing keys/values from header file
554def readHdrFile(hdrfilename):
555    output = {}
556    inblock = False
557   
558    try:
559        hdrfile = open(hdrfilename, "r")
560    except:
561        print "Could not open hdr file '" + str(hdrfilename) + "'"
562        raise
563    # end try
564   
565    # Read line, split it on equals, strip whitespace from resulting strings and add key/value pair to output
566    currentline = hdrfile.readline()
567    while (currentline != ""):
568        # ENVI headers accept blocks bracketed by curly braces - check for these
569        if (not inblock):
570            # Split line on first equals sign
571            if (re.search("=", currentline) != None):
572                linesplit = re.split("=", currentline, 1)
573                key = linesplit[0].strip()
574                value = linesplit[1].strip()
575               
576                # If value starts with an open brace, it's the start of a block - strip the brace off and read the rest of the block
577                if (re.match("{", value) != None):
578                    inblock = True
579                    value = re.sub("^{", "", value, 1)
580                   
581                    # If value ends with a close brace it's the end of the block as well - strip the brace off
582                    if (re.search("}$", value)):
583                        inblock = False
584                        value = re.sub("}$", "", value, 1)
585                    # end if
586                # end if
587                value = value.strip()
588                output[key] = value
589            # end if
590        else:
591            # If we're in a block, just read the line, strip whitespace (and any closing brace ending the block) and add the whole thing
592            value = currentline.strip()
593            if (re.search("}$", value)):
594                inblock = False
595                value = re.sub("}$", "", value, 1)
596                value = value.strip()
597            # end if
598            output[key] = output[key] + value
599        # end if
600       
601        currentline = hdrfile.readline()
602    # end while
603   
604    hdrfile.close()
605   
606    return output
607# end function
608
609# Function getEnviType
610# Gets the ENVI type code equivalent to a particular Python struct format string
611#
612# Arguments
613# formatstr: Struct format string to get ENVI type code for
614#
615# Returns: ENVI numeric type code for supplied format string
616def getEnviType(formatstr):
617   
618    dtype = -1
619   
620    # Check the given format string is valid
621    try:
622        struct.calcsize(formatstr)
623    except:
624        raise ValueError, formatstr + " is not a valid format string"
625    # end try
626   
627    # Do the conversion
628    if (formatstr == "b"):
629        dtype = 1 # Signed (?) byte
630    elif (formatstr == "h"):
631        dtype = 2 # 2-byte signed short int (ENVI calls it an int)
632    elif (formatstr == "H"):
633        dtype = 12 # 2-byte unsigned int (ENVI calls it an int)
634    elif (formatstr == "i"):
635        dtype = 3 # 4-byte signed int (ENVI calls it a Long)
636    elif (formatstr == "I"):
637        dtype = 13 # 4-byte unsigned int (ENVI calls it a Long)
638    elif (formatstr == "f"):
639        dtype = 4 # 4-byte float
640    elif (formatstr == "d"):
641        dtype = 5 # 8-byte double precision
642    elif (formatstr == "l"):
643        dtype = 14 # 8-byte long int (ENVI 64-bit int)
644    elif (formatstr == "L"):
645        dtype = 15 # 8-byte unsigned long int (ENVI 64-bit int)
646    else:
647        # If we get here then the format string is valid for Python but not for ENVI, raise an error
648        raise ValueError, formatstr + " is a valid Python format string but does not have an ENVI equivalent"
649    # end if
650   
651    return dtype
652# end function
653
654# Function getStructType
655# Gets the Python struct format string equivalent to a particular ENVI type code
656#
657# Arguments
658# typecode: ENVI type code to get Python format string for
659#
660# Returns: Single-character Python struct format string
661def getStructType(typecode):
662   
663    try:
664        inttype = int(typecode)
665    except:
666        raise ValueError, str(typecode) + " is not a valid ENVI type for conversion"
667    # end try
668   
669    # Do the conversion
670    if (inttype == 1):
671        formatstr = "b" # Signed (?) byte
672    elif (inttype == 2):
673        formatstr = "h" # 2-byte signed short int (ENVI calls it an int)
674    elif (inttype == 12):
675        formatstr = "H" # 2-byte unsigned int (ENVI calls it an int)
676    elif (inttype == 3):
677        formatstr = "i" # 4-byte signed int (ENVI calls it a Long)
678    elif (inttype == 13):
679        formatstr = "I" # 4-byte unsigned int (ENVI calls it a Long)
680    elif (inttype == 4):
681        formatstr = "f" # 4-byte float
682    elif (inttype == 5):
683        formatstr = "d" # 8-byte double precision
684    elif (inttype == 14):
685        formatstr = "l" # 8-byte long int (ENVI 64-bit int)
686    elif (inttype == 15):
687        formatstr = "L" # 8-byte unsigned long int (ENVI 64-bit int)
688    else:
689        # If we get here then the type code doesn't have a Python equivalent, raise an error
690        raise ValueError, str(typecode) + " does not have an equivalent Python format string"
691    # end if
692   
693    return formatstr
694# end function