• Main Page
  • Namespaces
  • Classes
  • Files
  • File List

/home/mark/model/software/ScrumPy/ScrumPy/Data/Histo.py

00001 import types,math
00002 
00003 from Util import Seq
00004 import Stats,  Plotter
00005 
00006 class Histo(dict):
00007 
00008     def __init__(self,vals, nbins=10,minv=None,maxv=None,emptybins=True,name="Histogram"):
00009 
00010         self.vals  = vals[:]
00011         self.minv  = minv
00012         self.maxv  = maxv
00013         self.nbins = nbins
00014         self.emptybins = emptybins
00015         self.name = name
00016         self.style = "boxes"
00017 
00018         self.Plotter = Plotter.GPPlotter()
00019         self.Plotter.Options['yrange'] = 'set yra [0:]'
00020         if len(vals)>1:
00021             self.Update() # might be starting empty
00022 
00023 
00024     def __str__(self):
00025         rv = ""
00026         ks = self.keys()
00027         ks.sort()
00028         for k in ks:
00029             rv += " ".join([str(k), str(self[k])])+"\n"
00030         return rv
00031 
00032     def __iadd__(self,items):
00033         self.vals += items
00034         return self
00035 
00036     def append(self,items):
00037         self.vals.append(items)
00038 
00039     def extend(self,items):
00040         self.vals.extend(items)
00041 
00042     def Add(self, vals):
00043         self.vals.extend(vals)
00044         if min(vals) < self.minv:
00045             self.minv=min(vals)
00046         if max(vals)> self.maxv:
00047             self.maxv=max(vals)
00048         self.Update()
00049 
00050     def AsTuples(self):
00051 
00052         ks = self.keys()
00053         ks.sort()
00054         rv = []
00055         for k in ks:
00056             rv.append( (k,self[k]))
00057         return rv
00058 
00059     def AsLists(self):
00060         keys = self.keys()
00061         keys.sort()
00062         vals = map(lambda k: self[k], keys)
00063         return keys, vals
00064 
00065     def AsList(self):
00066         return self.AsLists()[1]
00067 
00068 
00069     def Plot(self):
00070 
00071         self.Plotter.AddData(self.name, self.AsLists())
00072         minv, maxv = map(str, (self.minv, self.maxv))
00073         self.Plotter.Options['xrange'] = 'set xra ['+minv +':'+maxv+']'
00074         self.Plotter[self.name].Options["Style"] = self.style
00075         self.Plotter.Plot()
00076 
00077 
00078     def Update(self):
00079 
00080         self.vals.sort()
00081 
00082         if self.minv == None:
00083             self.minv = self.vals[0]
00084 
00085         if self.maxv == None:
00086             self.maxv = self.vals[-1]
00087 
00088         minv = self.minv = float(self.minv)
00089         maxv = self.maxv = float(self.maxv)
00090 
00091         binsize  = (maxv - minv)/(self.nbins)
00092         binsize2 = binsize/2
00093         curbin = minv+ binsize2
00094 
00095         for n in self.keys():
00096             del self[n]
00097 
00098         for n in range(self.nbins):
00099             curbin += binsize
00100             self[curbin] = 0
00101 
00102         bins = sorted(self.keys())
00103         curbin = bins.pop(0)
00104         maxbin = curbin+binsize2
00105 
00106         for v in self.vals:
00107             while v>=maxbin:
00108                 curbin=bins.pop(0)
00109                 maxbin+=binsize
00110 
00111             self[curbin]+=1
00112 
00113 
00114         if self.Plotter.has_key(self.name):
00115             self.Plotter.RemoveData(self.name)
00116 
00117 
00118 
00119 
00120 
00121 
00122     def AutoParam(self):
00123 
00124         self.nbins = 3*int(math.ceil(len(self.vals)**0.33))
00125         self.minv = min(self.vals)
00126         self.maxv = max(self.vals)
00127         self.Clear()
00128         self.Update()
00129 
00130 
00131     def Clear(self):
00132 
00133         for k in self.keys()[:]:
00134             del self[k]
00135 
00136         self.minv=None
00137         self.maxv=None
00138 
00139 
00140     def Save(self, file):
00141 
00142         if type(file)==types.StringType:
00143             file = open(file,"w")
00144 
00145         file.write(str(self))
00146 
00147 
00148     def Normalise(self):
00149 
00150         ks = self.keys()
00151         range = max(ks) - min(ks)
00152         Area=sum(self.values())  *range/len(self)
00153         for k in ks:
00154             self[k] /=Area
00155 
00156 
00157 
00158 
00159     def ChiSquare(self, sample=None, ref=None,nbins=None,Plot=True,*args,**kwargs):
00160         """ pre sample = list or None, ref=list or function or None, sample=None or ref=None,nbins>2 """
00161 
00162         if nbins==None:
00163             nbins=self.nbins
00164         else:
00165             self.nbins = nbins
00166 
00167         self.Clear()
00168 
00169         if sample != None: # we are the reference
00170             other = Histo(sample,nbins)
00171             minv = min((min(sample), min(self.vals)))
00172             maxv = max((max(sample), max(self.vals)))
00173             self.minv = other.minv = minv
00174             self.maxv = other.maxv = maxv
00175             self.Update()
00176             if 0 in self.values():
00177                 print "0 count(s) in reference, need more reference data or fewer bins"
00178                 return
00179 
00180             rv = Stats.ChiSquare(other.AsList(),self.AsList(),True)
00181         else: # we are the sample
00182 
00183             if type(ref) == types.FunctionType:  # reference is a funtion
00184                 other = Histo([])
00185                 self.Update()
00186                 for k in self.keys():
00187                     other[k] = ref(k,*args,**kwargs)
00188                 rv = Stats.ChiSquare(self.AsList(),other.AsList(),True)
00189 
00190             else:
00191                 self.Update()
00192                 other = Histo(ref)  # reference is data
00193                 rv =  other.ChiSquare(self.vals[:],nbins=nbins,Plot=False)
00194 
00195         if Plot:
00196             self.Normalise()
00197             other.Normalise()
00198             self.Plotter.AddToPlot(other.AsLists())
00199             self.Plot()
00200 
00201         return rv
00202 
00203 
00204 
00205 def ChiSquare(obs, ref, nbins,Norm=True):
00206 
00207     maxv = max(obs+ref)
00208     minv = min(obs +  ref)
00209     hobs = Histo(obs,nbins,minv=minv,maxv=maxv)
00210     href = Histo(ref,nbins,minv=minv,maxv=maxv)
00211     chisq, p = Stats.ChiSquare(hobs.AsList(),href.AsList(),Norm)
00212     rv = {}
00213     rv["ChiSq"]=chisq
00214     rv["p_ChisSq"] = p
00215     rv["ObsHist"] = hobs
00216     rv["RefHist"] = href
00217     return rv
00218 
00219 
00220 
00221 
00222 def BoolList2His(List, binsize):
00223 
00224     nbins,rem = divmod(len(List),binsize)
00225 
00226     midbin = binsize/2.
00227     rv = {}
00228     for n in range(nbins):
00229         lo = n*binsize
00230         hi = lo+binsize
00231         count = List[lo:hi].count(True)
00232         rv[lo+midbin]=count
00233 
00234     if rem >0:
00235         rv[hi+midbin] = List[hi:].count(True)
00236     return rv

Generated on Tue Sep 4 2012 15:38:01 for ScrumPy by  doxygen 1.7.1