00001 import types,math
00002
00003 from Util import Seq
00004 import Stats, Plotter
00005
00006 class Histo(dict):
00007
00008 def __init__(self,vals, nbins=10,minv=None,maxv=None,emptybins=True,name="Histogram"):
00009
00010 self.vals = vals[:]
00011 self.minv = minv
00012 self.maxv = maxv
00013 self.nbins = nbins
00014 self.emptybins = emptybins
00015 self.name = name
00016 self.style = "boxes"
00017
00018 self.Plotter = Plotter.GPPlotter()
00019 self.Plotter.Options['yrange'] = 'set yra [0:]'
00020 if len(vals)>1:
00021 self.Update()
00022
00023
00024 def __str__(self):
00025 rv = ""
00026 ks = self.keys()
00027 ks.sort()
00028 for k in ks:
00029 rv += " ".join([str(k), str(self[k])])+"\n"
00030 return rv
00031
00032 def __iadd__(self,items):
00033 self.vals += items
00034 return self
00035
00036 def append(self,items):
00037 self.vals.append(items)
00038
00039 def extend(self,items):
00040 self.vals.extend(items)
00041
00042 def Add(self, vals):
00043 self.vals.extend(vals)
00044 if min(vals) < self.minv:
00045 self.minv=min(vals)
00046 if max(vals)> self.maxv:
00047 self.maxv=max(vals)
00048 self.Update()
00049
00050 def AsTuples(self):
00051
00052 ks = self.keys()
00053 ks.sort()
00054 rv = []
00055 for k in ks:
00056 rv.append( (k,self[k]))
00057 return rv
00058
00059 def AsLists(self):
00060 keys = self.keys()
00061 keys.sort()
00062 vals = map(lambda k: self[k], keys)
00063 return keys, vals
00064
00065 def AsList(self):
00066 return self.AsLists()[1]
00067
00068
00069 def Plot(self):
00070
00071 self.Plotter.AddData(self.name, self.AsLists())
00072 minv, maxv = map(str, (self.minv, self.maxv))
00073 self.Plotter.Options['xrange'] = 'set xra ['+minv +':'+maxv+']'
00074 self.Plotter[self.name].Options["Style"] = self.style
00075 self.Plotter.Plot()
00076
00077
00078 def Update(self):
00079
00080 self.vals.sort()
00081
00082 if self.minv == None:
00083 self.minv = self.vals[0]
00084
00085 if self.maxv == None:
00086 self.maxv = self.vals[-1]
00087
00088 minv = self.minv = float(self.minv)
00089 maxv = self.maxv = float(self.maxv)
00090
00091 binsize = (maxv - minv)/(self.nbins)
00092 binsize2 = binsize/2
00093 curbin = minv+ binsize2
00094
00095 for n in self.keys():
00096 del self[n]
00097
00098 for n in range(self.nbins):
00099 curbin += binsize
00100 self[curbin] = 0
00101
00102 bins = sorted(self.keys())
00103 curbin = bins.pop(0)
00104 maxbin = curbin+binsize2
00105
00106 for v in self.vals:
00107 while v>=maxbin:
00108 curbin=bins.pop(0)
00109 maxbin+=binsize
00110
00111 self[curbin]+=1
00112
00113
00114 if self.Plotter.has_key(self.name):
00115 self.Plotter.RemoveData(self.name)
00116
00117
00118
00119
00120
00121
00122 def AutoParam(self):
00123
00124 self.nbins = 3*int(math.ceil(len(self.vals)**0.33))
00125 self.minv = min(self.vals)
00126 self.maxv = max(self.vals)
00127 self.Clear()
00128 self.Update()
00129
00130
00131 def Clear(self):
00132
00133 for k in self.keys()[:]:
00134 del self[k]
00135
00136 self.minv=None
00137 self.maxv=None
00138
00139
00140 def Save(self, file):
00141
00142 if type(file)==types.StringType:
00143 file = open(file,"w")
00144
00145 file.write(str(self))
00146
00147
00148 def Normalise(self):
00149
00150 ks = self.keys()
00151 range = max(ks) - min(ks)
00152 Area=sum(self.values()) *range/len(self)
00153 for k in ks:
00154 self[k] /=Area
00155
00156
00157
00158
00159 def ChiSquare(self, sample=None, ref=None,nbins=None,Plot=True,*args,**kwargs):
00160 """ pre sample = list or None, ref=list or function or None, sample=None or ref=None,nbins>2 """
00161
00162 if nbins==None:
00163 nbins=self.nbins
00164 else:
00165 self.nbins = nbins
00166
00167 self.Clear()
00168
00169 if sample != None:
00170 other = Histo(sample,nbins)
00171 minv = min((min(sample), min(self.vals)))
00172 maxv = max((max(sample), max(self.vals)))
00173 self.minv = other.minv = minv
00174 self.maxv = other.maxv = maxv
00175 self.Update()
00176 if 0 in self.values():
00177 print "0 count(s) in reference, need more reference data or fewer bins"
00178 return
00179
00180 rv = Stats.ChiSquare(other.AsList(),self.AsList(),True)
00181 else:
00182
00183 if type(ref) == types.FunctionType:
00184 other = Histo([])
00185 self.Update()
00186 for k in self.keys():
00187 other[k] = ref(k,*args,**kwargs)
00188 rv = Stats.ChiSquare(self.AsList(),other.AsList(),True)
00189
00190 else:
00191 self.Update()
00192 other = Histo(ref)
00193 rv = other.ChiSquare(self.vals[:],nbins=nbins,Plot=False)
00194
00195 if Plot:
00196 self.Normalise()
00197 other.Normalise()
00198 self.Plotter.AddToPlot(other.AsLists())
00199 self.Plot()
00200
00201 return rv
00202
00203
00204
00205 def ChiSquare(obs, ref, nbins,Norm=True):
00206
00207 maxv = max(obs+ref)
00208 minv = min(obs + ref)
00209 hobs = Histo(obs,nbins,minv=minv,maxv=maxv)
00210 href = Histo(ref,nbins,minv=minv,maxv=maxv)
00211 chisq, p = Stats.ChiSquare(hobs.AsList(),href.AsList(),Norm)
00212 rv = {}
00213 rv["ChiSq"]=chisq
00214 rv["p_ChisSq"] = p
00215 rv["ObsHist"] = hobs
00216 rv["RefHist"] = href
00217 return rv
00218
00219
00220
00221
00222 def BoolList2His(List, binsize):
00223
00224 nbins,rem = divmod(len(List),binsize)
00225
00226 midbin = binsize/2.
00227 rv = {}
00228 for n in range(nbins):
00229 lo = n*binsize
00230 hi = lo+binsize
00231 count = List[lo:hi].count(True)
00232 rv[lo+midbin]=count
00233
00234 if rem >0:
00235 rv[hi+midbin] = List[hi:].count(True)
00236 return rv