Package Dbase :: Package Pubmed :: Module Records
[hide private]
[frames] | no frames]

Source Code for Module Dbase.Pubmed.Records

  1  # $Id: Records.py 486 2008-01-19 14:19:19Z glandrum $ 
  2  # 
  3  # Copyright (C) 2003-2006 Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved  @@ 
  6  # 
  7  from xml.etree import ElementTree 
  8  # check the version of ElementTree.  We need at least version 1.2 
  9  # in order for the XPath-style parsing stuff to work 
 10  import re 
 11  vers = re.split("[a-zA-Z]",ElementTree.VERSION)[0] 
 12  if vers < '1.2': 
 13    raise ImportError,'The PubMed record interface requires a version of ElementTree >= 1.2' 
 14   
 15   
16 -class Record(object):
17 - def __init__(self,element):
18 for field in self._fieldsOfInterest: 19 setattr(self,field,'') 20 self._element = element
21 - def toXML(self):
22 from cStringIO import StringIO 23 sio = StringIO() 24 ElementTree.ElementTree(self._element).write(sio) 25 return sio.getvalue()
26
27 -class SummaryRecord(Record):
28 _fieldsOfInterest=['PubMedId','PubDate','Source','Authors', 29 'Title','Volume','Issue','Pages','Lang', 30 'HasAbstract','RecordStatus']
31 - def __init__(self,element):
32 Record.__init__(self,element) 33 for item in element.getiterator('Item'): 34 if item.attrib['Name'] in self._fieldsOfInterest: 35 setattr(self,item.attrib['Name'],item.text) 36 if self.PubDate: 37 self.PubYear = str(self.PubDate).split(' ')[0]
38
39 -class JournalArticleRecord(Record):
40 _fieldsOfInterest=['PubMedId','PubYear','Source','Authors', 41 'Title','Volume','Issue','Pages','Lang', 42 'Abstract']
43 - def __init__(self,element):
44 Record.__init__(self,element) 45 46 cite = self._element.find('MedlineCitation') 47 self.PubMedId = cite.findtext('PMID') 48 article = cite.find('Article') 49 issue = article.find('Journal/JournalIssue') 50 self.Volume = issue.findtext('Volume') 51 self.Issue = issue.findtext('Issue') 52 self.PubYear = issue.findtext('PubDate/Year') 53 if not self.PubYear: 54 txt = issue.findtext('PubDate/MedlineDate') 55 self.PubYear = txt.split(' ')[0] 56 self.Title = unicode(article.findtext('ArticleTitle')) 57 self.Pages = article.findtext('Pagination/MedlinePgn') 58 abs = article.findtext('Abstract/AbstractText') 59 if abs: 60 self.Abstract = unicode(abs) 61 62 self.authors = [] 63 tmp = [] 64 for author in article.find('AuthorList').getiterator('Author'): 65 last = unicode(author.findtext('LastName')) 66 first = unicode(author.findtext('ForeName')) 67 initials = unicode(author.findtext('Initials')) 68 self.authors.append((last,first,initials)) 69 tmp.append('%s %s'%(last,initials)) 70 self.Authors=', '.join(tmp) 71 journal = cite.findtext('MedlineJournalInfo/MedlineTA') 72 if journal: 73 self.Source = unicode(journal) 74 75 self.ParseKeywords() 76 self.ParseChemicals()
77
78 - def ParseKeywords(self):
79 self.keywords = [] 80 headings = self.find('MedlineCitation/MeshHeadingList') 81 if headings: 82 for heading in headings.getiterator('MeshHeading'): 83 kw = unicode(heading.findtext('DescriptorName')) 84 for qualifier in heading.getiterator('QualifierName'): 85 kw += ' / %s'%(unicode(qualifier.text)) 86 self.keywords.append(kw)
87
88 - def ParseChemicals(self):
89 self.chemicals = [] 90 chemicals = self.find('MedlineCitation/ChemicalList') 91 if chemicals: 92 for chemical in chemicals.getiterator('Chemical'): 93 name = chemical.findtext('NameOfSubstance').encode('utf-8') 94 rn = chemical.findtext('RegistryNumber').encode('utf-8') 95 if rn != '0': 96 self.chemicals.append('%s <%s>'%(name,rn)) 97 else: 98 self.chemicals.append('%s'%(name))
99 100 101 # -------------------------------------------- 102 # 103 # We'll expose these ElementTree methods in case 104 # client code wants to pull extra info 105 #
106 - def getiterator(self,key=None):
107 if key is not None: 108 return self._element.getiterator(key) 109 else: 110 return self._element.getiterator()
111 - def find(self,key):
112 return self._element.find(key)
113 - def findtext(self,key):
114 return self._element.findtext(key)
115 - def findall(self,key):
116 return self._element.findall(key)
117
118 -class LinkRecord(Record):
119 _fieldsOfInterest=[]
120 - def __init__(self,element):
121 Record.__init__(self,element) 122 self.PubMedId = self._element.text 123 nbr = self._element.get('HasNeighbor','N') 124 if nbr == 'Y': 125 self.HasNeighbor = 1 126 else: 127 self.HasNeighbor = 0
128