• Main Page
  • Namespaces
  • Classes
  • Files
  • File List

/home/mark/model/software/ScrumPy/ScrumPy/Bioinf/PyoCyc/Base.py

00001 import os, types, exceptions
00002 import  Tags
00003 
00004 
00005 DefaultPath = "/usr/local/share/bio/db/biocyc/"
00006 
00007 def Pass(*foo,**bar):
00008     pass
00009 
00010 def Field2Str(u,v="",sep = Tags.Delim):
00011   "field as a \n terminated string: u - UID, v - Value, sep - seperator between u and v"
00012   return u + sep + str(v) + "\n"
00013 
00014 
00015 class Record:  # base class for BioCyc Records
00016 
00017     ChildFields = []
00018     ParentFields = []
00019     RecordClass = "Base" # these three should be defined by sub-classes
00020 
00021     def __init__(self, id,Org=None):
00022         # id is the biocyc unique id
00023         self.Attributes = {}
00024         self.Attributes[Tags.UID] = id
00025         self.UID = id
00026         self.Org = Org
00027 
00028 
00029     def __getitem__(self, k):
00030         return self.Attributes[k]
00031 
00032     def __setitem__(self, k, i):
00033         self.Attributes[k] = i
00034 
00035     def __getattr__(self, a):
00036         return getattr(self.Attributes, a)
00037 
00038     def __len__(self):
00039         return len(self.Attributes)
00040 
00041     def __str__(self):
00042         keys = self.keys()
00043         keys.remove(Tags.UID)
00044         rv = Field2Str(Tags.UID, self[Tags.UID])
00045         for k in keys:
00046             rec = self[k]
00047             for r in rec:
00048                 rv += Field2Str(k, r)
00049         return rv
00050 
00051     def __repr__(self):
00052         return self[Tags.UID]
00053 
00054     def __cmp__(self,other):
00055         if self.UID == other.UID:
00056             return 0
00057         if self.UID < other.UID:
00058             return -1
00059         return 1
00060 
00061 
00062     def write(self, f):
00063         """ pre: f = (FileName, "w")
00064            post: contents of self written to FileName """
00065 
00066         f.write(str(self)+Tags.RecEnd+"\n")
00067 
00068 
00069 
00070 
00071     def ValStr2AssocKeys(self, ValStr):
00072         """ convert string val into a set of keys for the association dict.
00073             Subclasses can overload this to fine tune the keys that get into
00074             the assoc dic."""
00075 
00076         StripChars = ["(", ")", ",","'",'"',"\t", "=","<", ">","/","SUP","SUB"]
00077         rv = []
00078 
00079         for c in StripChars:
00080             ValStr = ValStr.replace(c, " ")
00081         Vals = ValStr.split()
00082 
00083         for Val in Vals:
00084             if not (Val[0].isdigit() and Val[-1].isdigit()): # if start and end with digit assume int or float - ignore
00085                 if len(Val) >2:
00086                     rv.append(Val.upper())
00087         return rv
00088 
00089 
00090 
00091     def NewTag(self, tag,val):
00092         self.CurTag = tag
00093         try:
00094             self[tag].append(val)
00095         except:
00096             self[tag] = [val]
00097         if val !=None and self.Org != None:
00098             self.Org.AddAssoc(self.ValStr2AssocKeys(val), self)
00099 
00100     def ContTag(self,val):    # continue the current tag - needed in multiline fields
00101         self[self.CurTag].append(val)
00102 
00103 
00104     def Finished(self):
00105         """invoked when the end-of-record is read, sub-classes can overload this as needed """
00106         pass
00107 
00108 
00109 
00110 
00111     def GenChildren(self):
00112         self.Children = []
00113         for cf in self.ChildFields:
00114             if self.has_key(cf):
00115                 for c in self[cf]:
00116                     if self.Org.has_key(c):
00117                         self.Children.append(self.Org[c])
00118                     else:
00119                         self.Org.Missing[c] = NRRecord(c)
00120                         self.Children.append(self.Org[c])
00121 
00122 
00123     # if a sublcass of Record overloads GetChildren, it must also overload TravChildren,
00124     # passing the new GetChildren to Base.Record.TravChildren, ditto GetParents/TravParents
00125     def GetChildren(self):
00126         try:
00127             return self.Children[:]
00128         except:
00129             self.GenChildren()
00130             return self.Children[:]
00131 
00132 
00133     def TravChildren(self):
00134         seen = {}
00135         return self.__travc(seen)
00136 
00137 
00138     def __travc(self, Seen):
00139 
00140         rv = []
00141         Seen[self.UID]=1
00142         for c in self.GetChildren():
00143             if not Seen.has_key(c.UID):     # prevent cyclic recursion
00144                 rv.append(c)
00145                 rv.extend(c.__travc(Seen))
00146 
00147         return rv
00148 
00149 #EG10443
00150     def GenParents(self):
00151 
00152         self.Parents = []
00153         for pf in self.ParentFields:
00154             if self.has_key(pf):
00155                 for p in self[pf]:
00156                     if self.Org.has_key(p):
00157                         self.Parents.append(self.Org[p])
00158                     else:
00159                         self.Org.Missing[p] = NRRecord(p)
00160                         self.Parents.append(self.Org[p])
00161 
00162 
00163     def GetParents(self):
00164 
00165         try:
00166             return self.Parents[:]
00167         except:
00168             self.GenParents()
00169             return self.Parents[:]
00170 
00171 
00172 
00173 
00174 
00175     def TravParents(self):
00176         Seen = {}
00177         return self.__travp(Seen)
00178 
00179     def __travp(self, Seen):
00180 
00181         rv = []
00182         Seen[self.UID]=1
00183         for p in self.GetParents():
00184             if not Seen.has_key(p.UID) :     # prevent cyclic recursion
00185                 rv.append(p)
00186                 rv.extend(p.__travp(Seen=Seen))
00187         return rv
00188 
00189 
00190 
00191 # EG10864
00192     def MultiTrav(self, GoingUp=True, bounces=0):
00193 
00194         if GoingUp:
00195             rv = self.TravParents()
00196         else:
00197             rv = self.TravChildren()
00198 
00199         if bounces >0:
00200             rv2 = rv[:]
00201             for rec in rv2:
00202                 if hasattr(rec,"MultiTrav"):
00203                     rv.extend(rec.MultiTrav(not GoingUp, bounces-1))
00204         return rv
00205 
00206 
00207     def Traverse(self, GetRels="GetParents", **kwargs):  # kwargs ignored at present
00208         """ pre: GetRels = ["GetParents" | "GetChildren"]
00209             post: returns a traversed tree in list form [Parent[Child]] in direction of GetRels"""
00210 
00211         rv = []
00212         rels = getattr(self, GetRels)()
00213         for r in rels:
00214            rv.append(r)
00215            try:
00216                more = r.Traverse(GetRels)
00217                if len(more) > 0:
00218                    rv.append(more)
00219            except:
00220                pass
00221 
00222         return rv
00223 
00224     def GetReactions(self):
00225       " get any reactions that are descendents of self "
00226 
00227 
00228       reacs = []
00229       next = []
00230 
00231       ch = self.GetChildren()
00232       for c in ch:
00233         if hasattr(c, "RecordClass"):
00234             if c.RecordClass=="Reaction":
00235               reacs.append(c)
00236             else:
00237                 if c.RecordClass != self.RecordClass:
00238                     next.append(c)
00239 
00240       for n in next:
00241         reacs += n.GetReactions()
00242       return reacs
00243 
00244 
00245 class NRRecord(Record):
00246     """ A recoeded to indicate expected, but missing (Not Reported) data """
00247 
00248     RecordClass = Tags.NR
00249 
00250     def __init__(self, *args, **kwargs):
00251 
00252         Record.__init__(self, *args, **kwargs)
00253 
00254         self.NewTag(Tags.Comment, Tags.NR)
00255         self.NewTag(Tags.Types, Tags.NR)
00256 
00257 
00258 
00259 
00260 
00261 class DB:   # base class for BioCyc databases
00262     def __init__(self,
00263                  path,                            #  directory containing file
00264                  file,                              #  file in biocyc dat format
00265                  RecClass=Record,       #  what kind of record we contain (Base.Record or sub-class thereof)
00266                  RecRep=Pass,              #  invoke for each new record - optional - progress indicator etc.
00267                  **kwargs):
00268 
00269 
00270         self.Records = {}
00271         self.Comments = []
00272         self.Org = None
00273         self.RecClass=RecClass
00274         if kwargs.has_key("Org"):
00275             self.Org = kwargs["Org"]
00276 
00277 
00278         LineNo = 0
00279         if type(file) != types.FileType:
00280           try:
00281             file = open(path + os.sep + file)
00282           except:
00283             print "couldn't open ",path + os.sep + file, " this db will be empty"
00284             return
00285 
00286         for line in file.readlines():
00287             LineNo += 1
00288             if len(line)>0:
00289                 if line[0] =="#" :            # comments on a per db basis
00290                     self.Comments.append(line)
00291                 elif line[0:2] == "//":    # record seperator, ignore - we use UID to identify start of record
00292                     self.CurRec.Finished()
00293                 elif line [0] == "/":        # continuation of a previously started field
00294                     self.CurRec.ContTag(line[1:].rstrip()) # add to current record removing leading "/" and trailing ws
00295                 else:
00296                     tagval = line.rstrip().split(" - ",1)
00297                     tag = tagval[0]
00298                     if len(tagval) == 2:
00299                         val = tagval[1]
00300                     else:
00301                         val = None
00302                     if tag == Tags.UID:
00303                         self.CurRec = RecClass(val,**kwargs)
00304                         self.Records[val]=self.CurRec
00305                         RecRep()                  # report the creation of a new record, if anyone's interested
00306                     else:
00307                         RecClass.NewTag(self.CurRec,tag,val)
00308 
00309         if len(self.Comments) >0:
00310            self.Imported = self.Comments[0].rstrip() == Tags.Import
00311            if self.Imported:
00312                self.GetReactions = self.__iGetReactions
00313         else:
00314             self.Imported = False
00315 
00316 
00317 
00318     def __getitem__(self, k):
00319         return self.Records[k]
00320 
00321     def __getattr__(self, a):
00322         return getattr(self.Records,a)
00323 
00324     def __len__(self):
00325         return len(self.Records)
00326 
00327     def write(self, f):
00328         """ pre: f= open(FileName, "w")
00329            post: self written to file such that DB(FileName) is equivalent to self """
00330 
00331         for c in self.Comments:
00332             f.write(c)
00333 
00334         for r in self.values():
00335             self.RecClass.write(r,f)
00336 
00337     def Prune(self, fun, **kwargs):
00338         """ pre: bool fun(record, **kwargs)
00339            post: fun(record, **kwargs) => record not in self' """
00340 
00341         for k in self.keys():
00342             if fun(self[k], **kwargs):
00343                 del self[k]
00344 
00345     def GetReactions(self, uid):
00346         if self.has_key(uid):
00347             rv =  self[uid].GetReactions()
00348         else:
00349             rv = [uid+" Not found"]
00350 
00351         return rv
00352 
00353 
00354     def __iGetReactions(self,uid):
00355         if self.has_key(uid):
00356             if self[uid].has_key(Tags.Reac):
00357                 return self[uid][Tags.Reac]
00358             else:
00359                 return []
00360         else:
00361             return [uid+" Not found"]
00362 
00363     def Duplicates(self, field):
00364         """dictionary of records with identical values in field, field values are keys in the return dictionary
00365            records not contaning field are assumed to have the value "None" """
00366 
00367         rv = {}
00368         for rec in self.values():
00369             if rec.has_key(field):
00370                 fk = rec[field][0]
00371             else:
00372                 fk = None
00373 
00374             if rv.has_key(fk):
00375                 rv[fk].append(rec)
00376             else:
00377                 rv[fk] = [rec]
00378 
00379         for k in rv.keys():
00380             if len(rv[k]) == 1:
00381                 del rv[k]
00382 
00383         return rv
00384 
00385 
00386 """
00387     def ExSearch(self, targ):
00388 
00389 
00390         rv = []
00391         for item in self.items():
00392             hit = 0
00393             for field in item[1].items():
00394                 for fi in field[1]:
00395                     if  fi.find(targ) != -1:
00396                         rv.append([fi, field[0], item[0]])
00397                         hit = 1
00398                         break
00399                 if hit:
00400                     hit = 0
00401                     break
00402         return rv
00403 """
00404 
00405 
00406 

Generated on Tue Sep 4 2012 15:38:01 for ScrumPy by  doxygen 1.7.1