00001
00002
00003
00004 """
00005
00006 ScrumPy -- Metabolic Modelling with Python
00007
00008 Copyright Mark Poolman 1995 - 2002
00009
00010 This file is part of ScrumPy.
00011
00012 ScrumPy is free software; you can redistribute it and/or modify
00013 it under the terms of the GNU General Public License as published by
00014 the Free Software Foundation; either version 2 of the License, or
00015 (at your option) any later version.
00016
00017 ScrumPy is distributed in the hope that it will be useful,
00018 but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00020 GNU General Public License for more details.
00021
00022 You should have received a copy of the GNU General Public License
00023 along with ScrumPy; if not, write to the Free Software
00024 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00025
00026 """
00027
00028 import sys,types, math
00029
00030
00031 from Util import DynMatrix, Seq, Set
00032
00033 import Stats, Histo, Plotter
00034
00035
00036
00037
00038
00039 class DataSet(DynMatrix.matrix):
00040
00041 def __init__(self, FromFile=None, ItemNames=[], SrcDic=None,FromMtx=None, Conv=float):
00042 """ pre: No more than one non-default argument
00043 (FileName != None) -> (File conforms to FileSpec)
00044 post: Usable """
00045
00046
00047 if FromMtx != None:
00048 DynMatrix.matrix.__init__(self,Conv=Conv, FromMtx=FromMtx)
00049 elif FromFile != None:
00050 DynMatrix.matrix.__init__(self,Conv=Conv)
00051 self.ReadFile(FromFile)
00052 elif SrcDic != None:
00053 DynMatrix.matrix.__init__(self,cnames=ItemNames,Conv=Conv)
00054 self.SrcDic = SrcDic
00055 elif ItemNames != []:
00056 DynMatrix.matrix.__init__(self,Conv=Conv,cnames=ItemNames)
00057 else:
00058 DynMatrix.matrix.__init__(self,Conv=Conv)
00059
00060 self.Plotter = Plotter.Plotter()
00061 self.Plotter.SetMissing()
00062 self.PlotX = 0
00063
00064
00065 def Copy(self):
00066
00067 return DataSet(FromMtx=self)
00068
00069
00070 def Subset(self, names):
00071 rv = DataSet()
00072 for name in names:
00073 rv.NewCol(self.GetCol(name),name)
00074 rv.rnames = self.rnames[:]
00075 return rv
00076
00077
00078
00079
00080
00081 def WriteFile(self,File,InOrder=None,*args,**kwargs):
00082 """ pre: (ColOrder == None) || (complete and exclusive list of column headings of self)
00083 post: FileName contains FileSpec representation of self
00084 (ColOrder != None) => Columns in ColOrder
00085 ELSE Exception """
00086
00087 if InOrder != None:
00088 OldOrder = self.rnames[:]
00089 self.RowReorder(InOrder)
00090 self.WriteFile(File,*args,**kwargs)
00091 self.RowReorder(OldOrder)
00092 else:
00093 DynMatrix.matrix.WriteFile(self,File,*args,**kwargs)
00094
00095
00096
00097 def WriteFileAsColPairs(self, File,ref):
00098
00099 if type(File)==types.StringType:
00100 File = open(File, "w")
00101
00102 refc = self.GetCol(ref)
00103
00104 for cname in self.cnames:
00105 if cname != ref:
00106 File.write("#" + ref+" "+cname+"\n")
00107 col = self.GetCol(cname)
00108 for i in range(len(refc)):
00109 File.write(str(refc[i])+"\t"+str(col[i]) + "\n")
00110 File.write("\n")
00111
00112
00113
00114
00115
00116
00117
00118
00119 def Update(self, label=None):
00120 if label==None:
00121 label = "Row_" + str(len(self))
00122 row = [0]*len(self.cnames)
00123 self.NewRow(row,label)
00124 try:
00125 for k in self.cnames:
00126 self[label,k]= self.SrcDic[k]
00127 except:
00128 print """"
00129 !!! DataSet.Update failed - either not created with source dctionary
00130 !!! or source dictionary keys has new keys
00131 """
00132
00133 def UpdateFromDic(self, dic, name=None):
00134
00135 self.NewRow()
00136 if name != None:
00137 self.rnames[-1] = name
00138 for k in dic:
00139 if not k in self.cnames:
00140 self.NewCol(name=k)
00141 self[-1,k] = dic[k]
00142
00143 def _get(self, Name):
00144 try:
00145 return self[Name]
00146 except:
00147 return self.GetCol(Name)
00148
00149 def Sum(self, Name):
00150 """ pre: len(self[Name]) > 0
00151 post: self.Sum(Name) == Arithmetic sum of self[Name] """
00152
00153 return sum(self._get(Name))
00154
00155 def Mean(self, Name):
00156 """ pre: len(self[Name]) > 0
00157 post: self.Mean(Name) == arithmetic mean of self[Name] """
00158
00159 return Stats.Mean(self._get(Name))
00160
00161
00162 def Variance(self, Name):
00163 """ pre: len(self[Name]) > 1
00164 post: self.Variance(Name)== variance self[Name] """
00165
00166 return Stats.Var(self._get(Name))
00167
00168
00169 def StdDev(self, Name):
00170 """ pre: len(self[Name]) > 1
00171 post: self.StdeDev(Name) == standard deviation of self[Name] """
00172
00173 return Stats.StdDev(self._get(Name))
00174
00175 def StdErr(self, Name):
00176 """ pre: len(self[Name]) > 1
00177 post: self.StdErr(Name) == standard error of the mean of self[Name] """
00178
00179 return self.StdDev(Name)/math.sqrt(len(self._get(Name)))
00180
00181
00182 def Lower_n_ile(self, Name, n):
00183 """ pre: len(self[Name]) > 0, 0 < n <= 1
00184 post: self.Lower_n_ile(self, Name, n) == value of self[Name] which is the limit of the
00185 lower nth portion of self[Name] """
00186
00187 return Stats.Lower_n_ile(self._get(Name),n)
00188
00189
00190 def Upper_n_ile(self, Name, n):
00191 """ pre: len(self[Name]) > 0, 0 < n <= 1
00192 post: self.Lower_n_ile(self, Name, n) == value of self[Name] which is the limit of the
00193 upper nth portion of self[Name] """
00194
00195 return Stats.Upper_n_ile(self._get(Name),n)
00196
00197
00198 def Median(self, Name):
00199 """ pre: len(self[Name]) > 0
00200 post: self.Median(Name) == Median value of self[Name] """
00201
00202 return Stats.Median(self._get(Name))
00203
00204
00205 def TTest(self, Name_a, Name_b):
00206 """ Student's (two tailed) t and p(t) """
00207 return Stats.TTest(self._get(Name_a),self._get(Name_b))
00208
00209
00210 def FTest(self, Name_a, Name_b):
00211 """ F and p(F) """
00212 return Stats.FTest(self._get(Name_a),self._get(Name_b))
00213
00214
00215 def Pearsons(self, ref, targs=[]):
00216 """ pre: ref and targs in self.cnames
00217 post: dictionary of {targ: Pearsonsr(ref, targ)}"""
00218
00219 rv = {}
00220 ref = self.GetCol(ref)
00221 for t in targs:
00222 rv[t] = Stats.Pearson_r(ref, self.GetCol(t))
00223
00224 return rv
00225
00226
00227 def Spearmans(self, ref, targs=[]):
00228 """ pre: ref and targs in self.cnames
00229 post: dictionary of {targ: Pearsonsr(ref, targ)}"""
00230
00231 rv = {}
00232 ref = self.GetCol(ref)
00233 for t in targs:
00234 rv[t] = Stats.Pearson_r(ref, self.GetCol(t))
00235
00236 return rv
00237
00238
00239 def Deriv(self, wrt):
00240
00241
00242 if wrt in self.cnames:
00243 self.Transpose()
00244 rv = self.Deriv(wrt)
00245 self.Transpose()
00246 rv.Transpose()
00247 return rv
00248
00249 rv = DataSet()
00250
00251 x = self[wrt]
00252 lenx_mi_1 = len(x)-1
00253 for rname in self.rnames:
00254 if rname != wrt:
00255 retrow = []
00256 y = self[rname]
00257 for n in range(1,lenx_mi_1):
00258 retrow.append(Seq.Deriv(x[n-1:n+2],y[n-1:n+2]))
00259 else:
00260 retrow = x[1:-1]
00261 rv.NewRow(retrow, rname)
00262 rv.cnames = self.cnames[1:-1]
00263 print self.cnames,rv.cnames
00264
00265 return rv
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275 def SetPlotX(self, x):
00276 """"pre: True
00277 post: if self has a column of name or index x, this will used as x axis for plotting
00278 else warning message on terminal"""
00279
00280 if x in self.cnames:
00281 self.PlotX = x
00282 else:
00283 print "!! ", x, "not found - ignoring !!"
00284
00285
00286 def AddToPlot(self, ys, style="linespoints", AutoPlot=True):
00287 """ pre: ys is a string or list of strings
00288 style is a valid gnuplot style
00289 post: if: present items in ys are added to the current plot
00290 else: warning(s) on terminal
00291 AutoPlot => plot is updated immediatly (applies to all other methods with AutoPlot parameter
00292 """
00293
00294 if not type(ys) == types.ListType:
00295 ys = [ys]
00296
00297 for y in ys:
00298 if y in self.cnames:
00299 self.Plotter.AddData(y, [self.GetCol(self.PlotX), self.GetCol(y)],style)
00300 else:
00301 print >>sys.stderr, "! ignoring attempt to plot non-existent value ",y, " !"
00302
00303 if AutoPlot:
00304 self.Plot()
00305
00306
00307
00308 def AddMatchesToPlot(self, substr, style="linespoints", AutoPlot=True):
00309 """ pre: substr is a string
00310 style is a valid gnuplot style
00311 post: item names containing substr are added to the current plot """
00312
00313 self.AddToPlot(filter(lambda col: substr in col, self.cnames), style, AutoPlot)
00314
00315
00316
00317 def RemoveFromPlot(self, ys, AutoPlot=True):
00318 """ pre: ys is a string or list of strings
00319 post: ys is/are not in the current plot"""
00320
00321 if not type(ys) == types.ListType:
00322 ys = [ys]
00323
00324 for y in ys:
00325 self.Plotter.RemoveData(y)
00326
00327 if AutoPlot:
00328 self.Plot()
00329
00330
00331
00332 def RemoveMatchesFromPlot(self, substr, AutoPlot=True):
00333 """ pre: substr is a string
00334 post: items whose name matches gsubstr are removed from the plot """
00335
00336 self.RemoveFromPlot(filter(lambda key: substr in key, self.Plotter.keys()), AutoPlot)
00337
00338
00339
00340 def GetPlotItems(self):
00341 """ return a list of names of data currently being plotted"""
00342
00343 return self.Plotter.keys()
00344
00345
00346 def GetPlotData(self, x, y):
00347 """ return a Plotter.Plotdata object, corresponding to col x and y,
00348 useful for combining data from more than one data set in a single plot.
00349 see the Data.Plotter module """
00350
00351 return Plotter.Plotdata([self.GetCol(self.PlotX), self.GetCol(y)])
00352
00353
00354 def SavePlot(self, filename):
00355 """ save the current plot in gnuplot readable format to filename """
00356
00357 self.Plotter.SavePlot(filename)
00358
00359
00360 def RemoveAllFromPlot(self):
00361 """remove all items from the plot """
00362
00363 self.RemoveFromPlot(self.Plotter.keys())
00364
00365
00366 def Plot(self):
00367 """ update the current plot window or plot in new one if not present"""
00368 x = self.GetCol(self.PlotX)
00369 for k in self.Plotter.keys():
00370 self.Plotter[k].UpdateData([x, self.GetCol(k)])
00371
00372 self.Plotter.Plot()
00373
00374
00375
00376
00377 def Filter(self, Targ, fun):
00378
00379 if Targ in self.rnames:
00380 self.Transpose()
00381 rv = self.Filter(Targ, fun)
00382 self.Transpose()
00383 rv.Transpose()
00384 return rv
00385
00386 rv = self.Copy()
00387 print Targ, self.cnames
00388 c = self.cnames.index(Targ)
00389 for r in self.rnames:
00390 if not fun(self[r,c]):
00391 rv.DelRow(r)
00392 return rv
00393
00394
00395
00396 def Histogram(self, name, nbins,**kwargs):
00397 if name in self.cnames:
00398 return Histo.Histo(self.GetCol(name), nbins, **kwargs)
00399 else:
00400 return Histo.Histo(self[name], nbins, **kwargs)
00401
00402
00403 def HistogramAll(self, nbins, **kwargs):
00404 data = []
00405 for r in self.rows:
00406 data.extend(r)
00407 return Histo.Histo(data, nbins, **kwargs)
00408
00409
00410 def HistogramUD(self, nbins,Diag=True, **kwargs):
00411 """ pre: self is square """
00412 data = []
00413 if Diag:
00414 for i in range(len(self)):
00415 data.extend(self[i][i:])
00416 else:
00417 for i in range(len(self)-1):
00418 data.extend(self[i][i+1:])
00419
00420 return Histo.Histo(data, nbins, **kwargs)
00421
00422
00423
00424
00425
00426
00427
00428