condensed code
# attempt to condense code while preserving the parts
# relevant to the question
from xml.sax import handler, make_parser
class pdosHandler(handler.ContentHandler):
def __init__(self, data):
self.data = data
self.parts = { 'energy_values': 0 }
self.energy_values = []
def startDocument( self ):
print "Reading started"
def startElement(self, name, attrs):
for key, val in self.parts.iteritems():
if( name == key ):
self.parts[key] = 1;
def characters( self, ch ):
if self.parts['energy_values'] :
if ch != '\n':
self.data.energy_values.append(float(ch.strip()))
def pdosreader(inp, data):
handler = pdosHandler(data)
parser = make_parser()
parser.setContentHandler(handler)
inFile = open(inp)
parser.parse(inFile)
inFile.close()
line 153-155:
if( self.parts['energy_values'] ):
if( ch != '\n' ):
self.data.energy_values.append( string.atof(normalize_whitespace( ch ) ) )
error:
Traceback (most recent call last):
File "siesta_pdos.py", line 286, in <module>
main()
File "siesta_pdos.py", line 278, in main
pdosreader( args[0], data )
File "siesta_pdos.py", line 262, in pdosreader
parser.parse( inFile )
File "/usr/lib/python2.7/xml/sax/expatreader.py", line 107, in parse
xmlreader.IncrementalParser.parse(self, source)
File "/usr/lib/python2.7/xml/sax/xmlreader.py", line 123, in parse
self.feed(buffer)
File "/usr/lib/python2.7/xml/sax/expatreader.py", line 207, in feed
self._parser.Parse(data, isFinal)
File "siesta_pdos.py", line 155, in characters
self.data.energy_values.append( string.atof(normalize_whitespace( ch ) ) )
File "/usr/lib/python2.7/string.py", line 388, in atof
return _float(s)
ValueError: could not convert string to float:
inputfile:
<pdos>
<nspin>2</nspin>
<norbitals>7748</norbitals>
<energy_values>
-29.99997
-29.98997
-29.97996
...
... (3494 lines skipped)
...
4.97999
4.98999
4.99999
</energy_values>
</pdos>
full input at: http://dl.dropbox.com/u/10405722/inputfile.dat
full code at: http://dl.dropbox.com/u/10405722/siesta_pdos.py
The code reads correctly the first 3116 values and then exits with the error. Note that the same code with a shorter input (e.g. 3000 lines) works fine. Therefore it seems to me a buffer-related error that has nothing to do with the atof.
Any idea?
The documentation says that string.atof is
You claim that
float()
doesn't work, which probably means that your input is invalid. It is very easy to useprint
when finding out why something doesn't work as you expectBecause you had to explain normalize_whitespace, that means it is a bad synonym; if you just called it strip, every reader would know what it did without having to look it up.
In case you don't know
repr
is intended to reduce ambiguity. For example:with the first print, it is unclear whether x is numeric or a string. With repr, there is no guessing involved.