• Main Page
  • Namespaces
  • Classes
  • Files
  • File List

/home/mark/model/software/ScrumPy/ScrumPy/Data/DataSets.py

00001 
00002 
00003 
00004 """
00005 
00006 ScrumPy -- Metabolic Modelling with Python
00007 
00008 Copyright Mark Poolman 1995 - 2002
00009 
00010  This file is part of ScrumPy.
00011 
00012     ScrumPy is free software; you can redistribute it and/or modify
00013     it under the terms of the GNU General Public License as published by
00014     the Free Software Foundation; either version 2 of the License, or
00015     (at your option) any later version.
00016 
00017     ScrumPy is distributed in the hope that it will be useful,
00018     but WITHOUT ANY WARRANTY; without even the implied warranty of
00019     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020     GNU General Public License for more details.
00021 
00022     You should have received a copy of the GNU General Public License
00023     along with ScrumPy; if not, write to the Free Software
00024     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00025 
00026 """
00027 
00028 import sys,types,  math
00029 
00030 
00031 from Util  import DynMatrix, Seq, Set
00032 
00033 import Stats, Histo,  Plotter
00034 
00035 
00036 
00037 
00038 
00039 class  DataSet(DynMatrix.matrix):
00040 
00041     def __init__(self, FromFile=None, ItemNames=[], SrcDic=None,FromMtx=None, Conv=float):
00042         """ pre: No more than one non-default argument
00043             (FileName != None) -> (File conforms to FileSpec)
00044            post: Usable """
00045 
00046 
00047         if FromMtx != None:
00048             DynMatrix.matrix.__init__(self,Conv=Conv, FromMtx=FromMtx)
00049         elif FromFile != None:
00050             DynMatrix.matrix.__init__(self,Conv=Conv)
00051             self.ReadFile(FromFile)
00052         elif SrcDic != None:
00053             DynMatrix.matrix.__init__(self,cnames=ItemNames,Conv=Conv)
00054             self.SrcDic = SrcDic
00055         elif ItemNames != []:
00056             DynMatrix.matrix.__init__(self,Conv=Conv,cnames=ItemNames)
00057         else:
00058             DynMatrix.matrix.__init__(self,Conv=Conv)
00059 
00060         self.Plotter = Plotter.Plotter()
00061         self.Plotter.SetMissing()
00062         self.PlotX = 0
00063 
00064 
00065     def Copy(self):
00066 
00067         return   DataSet(FromMtx=self)
00068 
00069 
00070     def Subset(self, names):
00071         rv = DataSet()
00072         for name in names:
00073             rv.NewCol(self.GetCol(name),name)
00074         rv.rnames  = self.rnames[:]
00075         return rv
00076 
00077 
00078 
00079 
00080 
00081     def WriteFile(self,File,InOrder=None,*args,**kwargs):
00082         """ pre: (ColOrder == None) || (complete and exclusive list of column headings of self)
00083            post: FileName contains FileSpec representation of self
00084                  (ColOrder != None) => Columns in ColOrder
00085                  ELSE Exception """
00086 
00087         if InOrder != None:
00088             OldOrder = self.rnames[:]
00089             self.RowReorder(InOrder)
00090             self.WriteFile(File,*args,**kwargs)
00091             self.RowReorder(OldOrder)
00092         else:
00093             DynMatrix.matrix.WriteFile(self,File,*args,**kwargs)
00094 
00095 
00096 
00097     def WriteFileAsColPairs(self, File,ref):
00098 
00099         if type(File)==types.StringType:
00100             File = open(File, "w")
00101 
00102         refc = self.GetCol(ref)
00103 
00104         for cname in self.cnames:
00105             if cname != ref:
00106                 File.write("#" + ref+" "+cname+"\n")
00107                 col = self.GetCol(cname)
00108                 for i in range(len(refc)):
00109                     File.write(str(refc[i])+"\t"+str(col[i]) + "\n")
00110                 File.write("\n")
00111 
00112 
00113 
00114 
00115 
00116 
00117 
00118 
00119     def Update(self, label=None):
00120         if label==None:
00121             label = "Row_" + str(len(self))
00122         row = [0]*len(self.cnames)
00123         self.NewRow(row,label)
00124         try:
00125             for k in self.cnames:
00126                 self[label,k]= self.SrcDic[k]
00127         except:
00128             print """"
00129             !!! DataSet.Update failed - either not created with source dctionary
00130             !!! or source dictionary keys has new keys
00131             """
00132 
00133     def UpdateFromDic(self, dic, name=None):
00134 
00135         self.NewRow()
00136         if name != None:
00137             self.rnames[-1] = name
00138         for k in dic:
00139             if not k in self.cnames:
00140                 self.NewCol(name=k)
00141             self[-1,k] = dic[k]
00142 
00143     def _get(self, Name):
00144         try:
00145             return self[Name]
00146         except:
00147             return self.GetCol(Name)
00148 
00149     def Sum(self, Name):
00150         """ pre: len(self[Name]) > 0
00151             post: self.Sum(Name) == Arithmetic sum of self[Name] """
00152 
00153         return sum(self._get(Name))
00154 
00155     def Mean(self, Name):
00156         """ pre: len(self[Name]) > 0
00157             post: self.Mean(Name)    == arithmetic mean of self[Name] """
00158 
00159         return Stats.Mean(self._get(Name))
00160 
00161 
00162     def Variance(self, Name):
00163         """ pre: len(self[Name]) > 1
00164             post: self.Variance(Name)== variance self[Name] """
00165 
00166         return Stats.Var(self._get(Name))
00167 
00168 
00169     def StdDev(self, Name):
00170         """ pre: len(self[Name]) > 1
00171             post: self.StdeDev(Name) == standard deviation of self[Name] """
00172 
00173         return Stats.StdDev(self._get(Name))
00174 
00175     def StdErr(self, Name):
00176         """ pre: len(self[Name]) > 1
00177             post: self.StdErr(Name)  ==  standard error of the mean of self[Name] """
00178 
00179         return self.StdDev(Name)/math.sqrt(len(self._get(Name)))
00180 
00181 
00182     def Lower_n_ile(self, Name, n):
00183         """ pre: len(self[Name]) > 0, 0 < n <= 1
00184             post: self.Lower_n_ile(self, Name, n) == value of self[Name] which is the limit of the
00185             lower nth portion of self[Name] """
00186 
00187         return Stats.Lower_n_ile(self._get(Name),n)
00188 
00189 
00190     def Upper_n_ile(self, Name, n):
00191         """ pre: len(self[Name]) > 0, 0 < n <= 1
00192             post: self.Lower_n_ile(self, Name, n) == value of self[Name] which is the limit of the
00193             upper nth portion of self[Name] """
00194 
00195         return Stats.Upper_n_ile(self._get(Name),n)
00196 
00197 
00198     def Median(self, Name):
00199         """ pre: len(self[Name]) > 0
00200             post: self.Median(Name) == Median value of self[Name] """
00201 
00202         return Stats.Median(self._get(Name))
00203 
00204 
00205     def TTest(self, Name_a, Name_b):
00206             """ Student's (two tailed) t and p(t) """
00207             return Stats.TTest(self._get(Name_a),self._get(Name_b))
00208 
00209 
00210     def FTest(self, Name_a,  Name_b):
00211             """ F and p(F) """
00212             return Stats.FTest(self._get(Name_a),self._get(Name_b))
00213 
00214 
00215     def Pearsons(self,  ref,  targs=[]):
00216         """ pre: ref and targs in self.cnames
00217            post: dictionary of {targ: Pearsonsr(ref, targ)}"""
00218 
00219         rv = {}
00220         ref = self.GetCol(ref)
00221         for t in targs:
00222            rv[t] = Stats.Pearson_r(ref, self.GetCol(t))
00223 
00224         return rv
00225 
00226 
00227     def Spearmans(self,  ref,  targs=[]):
00228         """ pre: ref and targs in self.cnames
00229            post: dictionary of {targ: Pearsonsr(ref, targ)}"""
00230 
00231         rv = {}
00232         ref = self.GetCol(ref)
00233         for t in targs:
00234            rv[t] = Stats.Pearson_r(ref, self.GetCol(t))
00235 
00236         return rv
00237 
00238 
00239     def Deriv(self, wrt):
00240 
00241         ## much easier to do with rows, so
00242         if wrt in self.cnames:
00243             self.Transpose()
00244             rv = self.Deriv(wrt)
00245             self.Transpose()
00246             rv.Transpose()
00247             return rv
00248 
00249         rv = DataSet()
00250 
00251         x = self[wrt]
00252         lenx_mi_1 = len(x)-1
00253         for rname in self.rnames:
00254             if rname != wrt:
00255                 retrow = []
00256                 y = self[rname]
00257                 for n in range(1,lenx_mi_1):
00258                     retrow.append(Seq.Deriv(x[n-1:n+2],y[n-1:n+2]))
00259             else:
00260                 retrow = x[1:-1]
00261             rv.NewRow(retrow, rname)
00262         rv.cnames = self.cnames[1:-1]
00263         print self.cnames,rv.cnames
00264 
00265         return rv
00266 
00267 
00268 
00269     #
00270     ##
00271     ###  Plotting functions #########################
00272     ##
00273     #
00274 
00275     def SetPlotX(self, x):
00276         """"pre: True
00277            post:    if self has a column of name or index x, this will used as x axis for plotting
00278                   else warning message on terminal"""
00279 
00280         if x in self.cnames:
00281             self.PlotX = x
00282         else:
00283             print "!! ",  x,  "not found - ignoring !!"
00284 
00285 
00286     def AddToPlot(self, ys, style="linespoints",  AutoPlot=True):
00287         """ pre:  ys is a string or list of strings
00288                   style is a valid gnuplot style
00289             post:   if: present items in ys are added to the current plot
00290                   else: warning(s) on terminal
00291                   AutoPlot => plot is updated immediatly (applies to all other methods with AutoPlot parameter
00292         """
00293 
00294         if not type(ys) == types.ListType:
00295             ys = [ys]
00296 
00297         for y in ys:
00298             if y in self.cnames:
00299                 self.Plotter.AddData(y, [self.GetCol(self.PlotX), self.GetCol(y)],style)
00300             else:
00301                 print >>sys.stderr, "! ignoring attempt to plot non-existent value ",y, " !"
00302 
00303         if AutoPlot:
00304             self.Plot()
00305 
00306 
00307 
00308     def AddMatchesToPlot(self, substr,  style="linespoints", AutoPlot=True):
00309         """ pre:  substr is a string
00310                   style is a valid gnuplot style
00311             post: item names  containing substr are added to the current plot """
00312 
00313         self.AddToPlot(filter(lambda col: substr in col, self.cnames), style,  AutoPlot)
00314 
00315 
00316 
00317     def RemoveFromPlot(self, ys, AutoPlot=True):
00318         """ pre: ys is a string or list of strings
00319            post: ys is/are not in the current plot"""
00320 
00321         if not type(ys) == types.ListType:
00322             ys = [ys]
00323 
00324         for y in ys:
00325             self.Plotter.RemoveData(y)
00326 
00327         if AutoPlot:
00328             self.Plot()
00329 
00330 
00331 
00332     def RemoveMatchesFromPlot(self, substr,  AutoPlot=True):
00333         """ pre: substr is a string
00334            post: items whose name matches gsubstr are removed from the plot """
00335 
00336         self.RemoveFromPlot(filter(lambda key: substr in key, self.Plotter.keys()), AutoPlot)
00337 
00338 
00339 
00340     def GetPlotItems(self):
00341         """ return a list of names of data currently being plotted"""
00342 
00343         return self.Plotter.keys()
00344 
00345 
00346     def GetPlotData(self, x, y):
00347         """ return a Plotter.Plotdata object, corresponding to col x and y,
00348             useful for combining data from more than one data set in a single plot.
00349             see the Data.Plotter module """
00350 
00351         return Plotter.Plotdata([self.GetCol(self.PlotX), self.GetCol(y)])
00352 
00353 
00354     def SavePlot(self, filename):
00355         """ save the current plot in gnuplot readable format to filename """
00356 
00357         self.Plotter.SavePlot(filename)
00358 
00359 
00360     def RemoveAllFromPlot(self):
00361         """remove all items from the plot """
00362 
00363         self.RemoveFromPlot(self.Plotter.keys())
00364 
00365 
00366     def Plot(self):
00367         """ update the current plot window or plot in new one if not present"""
00368         x = self.GetCol(self.PlotX)
00369         for k in self.Plotter.keys():
00370             self.Plotter[k].UpdateData([x, self.GetCol(k)])
00371 
00372         self.Plotter.Plot()
00373 
00374 
00375 
00376 
00377     def Filter(self, Targ, fun):
00378 
00379         if  Targ in self.rnames:
00380             self.Transpose()
00381             rv = self.Filter(Targ, fun)
00382             self.Transpose()
00383             rv.Transpose()
00384             return rv
00385 
00386         rv = self.Copy()
00387         print Targ, self.cnames
00388         c = self.cnames.index(Targ)
00389         for r in self.rnames:
00390             if not fun(self[r,c]):
00391                 rv.DelRow(r)
00392         return rv
00393 
00394 
00395 
00396     def Histogram(self, name, nbins,**kwargs):
00397         if name in self.cnames:
00398             return Histo.Histo(self.GetCol(name), nbins, **kwargs)
00399         else:
00400             return Histo.Histo(self[name], nbins, **kwargs)
00401 
00402 
00403     def HistogramAll(self, nbins, **kwargs):
00404         data = []
00405         for r in self.rows:
00406             data.extend(r)
00407         return Histo.Histo(data, nbins, **kwargs)
00408 
00409 
00410     def HistogramUD(self, nbins,Diag=True, **kwargs):
00411         """ pre: self is square """
00412         data = []
00413         if Diag:
00414             for i in range(len(self)):
00415                 data.extend(self[i][i:])
00416         else:
00417             for i in range(len(self)-1):
00418                 data.extend(self[i][i+1:])
00419 
00420         return Histo.Histo(data, nbins, **kwargs)
00421 
00422 
00423 
00424 
00425 
00426 
00427 
00428 

Generated on Tue Sep 4 2012 15:38:01 for ScrumPy by  doxygen 1.7.1