1919def loadData (filename , minrows = 10 , headers = False , hdel = '=' , hignore = None , ** kwargs ):
2020 """Find and load data from a text file.
2121
22- The data block is identified as the first matrix block of at least minrows rows
23- and constant number of columns. This seems to work for most of the datafiles including
24- those generated by diffpy programs.
22+ The data block is identified as the first matrix block of at least minrows rows and constant number of columns.
23+ This seems to work for most of the datafiles including those generated by diffpy programs.
2524
26- filename -- name of the file we want to load data from.
27- minrows -- minimum number of rows in the first data block.
28- All rows must have the same number of floating point values.
29- headers -- when False (defualt), the function returns a numpy array of the
30- data in the data block. When True, the function instead returns a
31- dictionary of parameters and their corresponding values parsed from
32- header (information prior the data block). See hdel and hignore for
33- options to help with parsing header information.
34- hdel -- (only used when headers enabled) delimiter for parsing header
35- information (default '='). e.g. using default hdel, the line
36- 'parameter = p_value' is put into the dictionary as
37- {parameter: p_value}.
38- hignore -- (only used when headers enabled) ignore header rows beginning
39- with any elements in the hignore list. e.g. hignore=['# ', '[']
40- means the following lines are skipped: '# qmax=10', '[defaults]'.
41- kwargs -- keyword arguments that are passed to numpy.loadtxt including
42- the following arguments below. (See also numpy.loadtxt for more
43- details.)
44- delimiter -- delimiter for the data in the block (default use whitespace).
45- For comma-separated data blocks, set delimiter to ','.
46- usecols -- zero-based index of columns to be loaded, by default use
47- all detected columns. The reading skips data blocks that
48- do not have the usecols-specified columns.
49- unpack -- return data as a sequence of columns that allows tuple
50- unpacking such as x, y = loadData(FILENAME, unpack=True).
51- Note transposing the loaded array as loadData(FILENAME).T
52- has the same effect.
25+ Parameters
26+ ----------
27+ filename
28+ Name of the file we want to load data from.
29+ minrows: int
30+ Minimum number of rows in the first data block. All rows must have the same number of floating point values.
31+ headers: bool
32+ when False (defualt), the function returns a numpy array of the data in the data block.
33+ When True, the function instead returns a dictionary of parameters and their corresponding values parsed from
34+ header (information prior the data block). See hdel and hignore for options to help with parsing header
35+ information.
36+ hdel: str
37+ (Only used when headers enabled.) Delimiter for parsing header information (default '='). e.g. using
38+ default hdel, the line 'parameter = p_value' is put into the dictionary as {parameter: p_value}.
39+ hignore: list
40+ (Only used when headers enabled.) Ignore header rows beginning with any elements in hignore.
41+ e.g. hignore=['# ', '['] causes the following lines to be skipped: '# qmax=10', '[defaults]'.
42+ kwargs:
43+ Keyword arguments that are passed to numpy.loadtxt including the following arguments below. (See
44+ numpy.loadtxt for more details.) Only pass kwargs used by numpy.loadtxt.
5345
54- Return a numpy array of the data (data_block). If headers enabled, instead returns a
55- dictionary of parameters read from the header (hddata).
46+ Useful kwargs
47+ =============
48+ delimiter: str
49+ Delimiter for the data in the block (default use whitespace). For comma-separated data blocks,
50+ set delimiter to ','.
51+ usecols:
52+ Zero-based index of columns to be loaded, by default use all detected columns. The reading skips
53+ data blocks that do not have the usecols-specified columns.
54+ unpack: bool
55+ Return data as a sequence of columns that allows tuple unpacking such as x, y =
56+ loadData(FILENAME, unpack=True). Note transposing the loaded array as loadData(FILENAME).T has the same
57+ effect.
58+
59+ Returns
60+ -------
61+ data_block: ndarray
62+ A numpy array containing the found data block. (This is not returned if headers is enabled.)
63+ hdata: dict
64+ If headers are enabled, return a dictionary of parameters read from the header.
5665 """
5766 from numpy import array , loadtxt
5867 # for storing header data
@@ -156,18 +165,24 @@ def countcolumnsvalues(line):
156165
157166
158167class TextDataLoader (object ):
159- '''Smart loading of a text data with possibly multiple datasets.
160- '''
168+ """Smart loading of a text data with possibly multiple datasets.
161169
162- minrows = 10
163- usecols = None
164- skiprows = None
170+ Parameters
171+ ----------
172+ minrows: int
173+ Minimum number of rows in the first data block. (Default 10.)
174+ usecols: tuple
175+ Which columns in our dataset to use. Ignores all other columns. If None (default), use all columns.
176+ skiprows
177+ Rows in dataset to skip. (Currently not functional.)
178+ """
165179
166- def __init__ (self , minrows = None , usecols = None , skiprows = None ):
180+ def __init__ (self , minrows = 10 , usecols = None , skiprows = None ):
167181 if minrows is not None :
168182 self .minrows = minrows
169183 if usecols is not None :
170184 self .usecols = tuple (usecols )
185+ # FIXME: implement usage in _findDataBlocks
171186 if skiprows is not None :
172187 self .skiprows = skiprows
173188 # data items
@@ -194,12 +209,23 @@ def _resetvars(self):
194209
195210
196211 def read (self , filename ):
212+ """Open a file and run readfp.
213+
214+ Use if file is not already open for read byte.
215+ """
197216 with open (filename , 'rb' ) as fp :
198217 self .readfp (fp )
199218 return
200219
201220
202221 def readfp (self , fp , append = False ):
222+ """Get file details.
223+
224+ File details include:
225+ * File name.
226+ * All data blocks findable by loadData.
227+ * Headers (if present) for each data block. (Generally the headers contain column name information).
228+ """
203229 self ._reset ()
204230 # try to read lines from fp first
205231 self ._lines = fp .readlines ()
0 commit comments