bmSearch  0.0.4
Public Member Functions | Public Attributes | Static Public Attributes | List of all members
mailLogParser.mailLogParser Class Reference

Public Member Functions

def __init__ (self)
 
def getData (self)
 
def getFileContent (self)
 
def getStreamContent (self)
 
def parseContent (self)
 
def setParam (self, kwargs)
 

Public Attributes

 action
 ANTIVIRUS. More...
 
 cDb
 
 data
 
 method
 
 queueidFromMessageid
 
 surveyQueuid
 

Static Public Attributes

 addonInfo
 
 bodyDateLine
 
 bodyFromLine
 
 bodySubjectLine
 
 cleanupLine
 
 compressionCase
 
 content = f.readlines()
 
 cyrusDelivered
 
 dates
 
 dbStore
 
 dbStoreMaxDays
 
 dbtemp
 
 f = open(self.maillog , 'r' )
 
 filenameLine
 
 ft = tarfile.open(self.maillog, self.compressionCase[ftype])
 
 ftype
 
 maillog
 
 member0 = ft.getnames()[0]
 
 p = subprocess.Popen(["zcat", self.maillog], stdout = subprocess.PIPE)
 
 pat2
 
 qmgrLine
 
 smtpdLine
 
 smtpLineDSN2
 
 smtpLineDSN4
 
 smtpLineDSN5
 
 smtpRemoved
 
 t = fileMIMEType()
 
 theseFileTypes
 
 timeInterval
 
 updateData
 
 updateSurveyQueuid
 

Detailed Description

mailLogParser :
        Has to open/stream mail.log file,
        parse each line to feed a dict with pertinent values
        and try (if kwargs set) to publish them in ELK platform

Constructor & Destructor Documentation

def mailLogParser.mailLogParser.__init__ (   self)
__init__ : 
        constructor accept kwargs
        
        **kwargs
                file   : path to file to parse. I can be plain text, gzip or bzip2 compressed. By default what configured in /etc/bmSearch/config.py
                
                method : how to read the file, 2 possibilities
                        - "file" : read the file and exit after reach end file. By default.
                        - "stream" : connect to the file and stream data. No exit while programm running
                        
                action : tell what to do, 2 cases
                        - "storeToELK"  : to store in Elasticsearch instance configured in /etc/bmSearch/config.py
                        - "updateToELK" : to update in Elasticsearch instance data (deliveredRecip)
48  def __init__(self):
49  """
50  __init__ :
51  constructor accept kwargs
52 
53  **kwargs
54  file : path to file to parse. I can be plain text, gzip or bzip2 compressed. By default what configured in /etc/bmSearch/config.py
55 
56  method : how to read the file, 2 possibilities
57  - "file" : read the file and exit after reach end file. By default.
58  - "stream" : connect to the file and stream data. No exit while programm running
59 
60  action : tell what to do, 2 cases
61  - "storeToELK" : to store in Elasticsearch instance configured in /etc/bmSearch/config.py
62  - "updateToELK" : to update in Elasticsearch instance data (deliveredRecip)
63 
64  """
65 
66  # Main data dict
67  self.data = defaultdict(dict)
68  self.surveyQueuid = []
69  self.queueidFromMessageid = defaultdict(dict) # Only for Antivirus case

Member Function Documentation

def mailLogParser.mailLogParser.getData (   self)
getData :
        Retrieve data dict values.
        Only for debug because must comment "del self.data[queueid]" line in parseContent() method.
651  def getData(self):
652  """
653  getData :
654  Retrieve data dict values.
655  Only for debug because must comment "del self.data[queueid]" line in parseContent() method.
656  """
657 
658  return self.data
659 
660 
661 
662 
663 
def mailLogParser.mailLogParser.getFileContent (   self)
getFileContent :
        Read a whole file.  Its name is from config or  bmSearch (main) kwargs "file" (see bmSearch).
        Set var "content" with line(s)
187  def getFileContent(self):
188  """
189  getFileContent :
190  Read a whole file. Its name is from config or bmSearch (main) kwargs "file" (see bmSearch).
191  Set var "content" with line(s)
192 
193  """
194 
def mailLogParser.mailLogParser.getStreamContent (   self)
getStreamContent :
        Connect to a file and strema its content (as a tail). Its name is from config or  bmSearch (main) kwargs "file" (see bmSearch).
        Set var "content" with line(s)
        Break only when main breaks
160  def getStreamContent(self):
161  """
162  getStreamContent :
163  Connect to a file and strema its content (as a tail). Its name is from config or bmSearch (main) kwargs "file" (see bmSearch).
164  Set var "content" with line(s)
165  Break only when main breaks
166  """
167  try:
168  stream = open(self.maillog)
169 
170  interval = 1.0
171 
172  while True:
173  where = stream.tell()
174  line = stream.readline()
175 
176  if not line:
177  time.sleep(interval)
178  stream.seek(where)
179  else:
180  yield line
181 
182  except Exception as error:
183  self.logger.catch('mailLogParser::getStreamContent')
184 
185 
186 
def mailLogParser.mailLogParser.parseContent (   self)
parseContent :
        read line from getFileContent (=> method = 'file') or getStreamContent (=> method ='stream') and extract data
        If  bmSearch (main) kwargs "action" is set and has value :
                - "storeToELK", try to post data to an Elasticsearch instance
                - "updateToELK", try to update an Elasticsearch instance data (deliveredRecip)
Open SQLITE DB
229  def parseContent(self):
230  """
231  parseContent :
232  read line from getFileContent (=> method = 'file') or getStreamContent (=> method ='stream') and extract data
233  If bmSearch (main) kwargs "action" is set and has value :
234  - "storeToELK", try to post data to an Elasticsearch instance
235  - "updateToELK", try to update an Elasticsearch instance data (deliveredRecip)
236  """
237 
238 
239  """
240  Open SQLITE DB
241  """
242  if self.method == "file":
243  db = self.dbtemp
244  if self.method == "stream":
245  db = self.dbStore
246 
247  self.cDb = sqlQuery(db)
248  self.cDb.initDb()
249  self.cDb.updateTrigger(self.dbStoreMaxDays)
250 
251 
252 
253  if self.method == "file":
254  for line in self.getFileContent():
255  self.parseLine(line)
256 
257  if self.method == "stream":
258  for line in self.getStreamContent():
259  self.parseLine(line)
260 
261 
262 
263  def decodev2(self, bodySubject):
264  """
265  decodev2:
266  return email subject in pretty view
267  Args :
268  bodySubject ; the email subject entry
269  """
270  data=self.pat2.findall(bodySubject)
271  line=[]
272  if data:
273  for g in data:
274  (raw,extra1,encoding,method,string,extra)=g
275  extra1=extra1.replace('\r','').replace('\n','').strip()
276  if len(extra1)>0:
277  line.append(extra1)
278  if method.lower()=='q':
279  string=quopri.decodestring(string)
280  string=string.replace("_"," ").strip()
281  if method.lower()=='b':
282  string=base64.b64decode(string)
283  line.append(string.decode(encoding,errors='ignore'))
284  extra=extra.replace('\r','').replace('\n','').strip()
285  if len(extra)>0:
286  line.append(extra)
287  return "".join(line)
288  else:
289  return bodySubject
290 
291 
292  def parseLine(self, line):
293  """
294  parseLine :
295  read line and extract data
296  If bmSearch (main) kwargs "action" is set and has value :
297  - "storeToELK", try to post data to an Elasticsearch instance
298  - "updateToELK", try to update an Elasticsearch instance data (deliveredRecip)
299  """
300  thisLine = line.rstrip()
301 
302  # Connection
303  smtpdLineEntries = self.smtpdLine.match(thisLine)
304  if smtpdLineEntries is not None:
305  # try to update some datas (antivirus, cyrus message delivery status)
306 
307  date = time.strftime('%Y-%m-%d',time.localtime())
308  try:
309  date = self.dates[smtpdLineEntries.group('date')]
310 
311  except:
312  pass
313 
314  hour = smtpdLineEntries.group('hour')
315  timestp = int(datetime.strptime(date + " " + hour, '%Y-%m-%d %H:%M:%S').strftime("%s")) * 1000
316  queueid = smtpdLineEntries.group('queueid')
317  server = smtpdLineEntries.group('server').split(",")[0].replace("["," : ").replace("]","")
318 
319  self.data[queueid] = defaultdict(dict)
320  self.data[queueid]['@timestamp'] = timestp
321  self.data[queueid]['date'] = date
322  self.data[queueid]['hour'] = hour
323  self.data[queueid]['server'] = server
324 
325  # Elastic Entries
326  self.data[queueid]['source'] = self.maillog
327  self.data[queueid]['type'] = 'message'
328 
329  self.data[queueid]['beat'] = defaultdict(dict)
330  self.data[queueid]['beat']['hostname'] = config.HOSTNAME
331  self.data[queueid]['beat']['name'] = 'bmsearch'
332  self.data[queueid]['beat']['version'] = config.VERSION
333 
334  self.data[queueid]['from'] = self.data[queueid]['fromDomain'] = self.data[queueid]['nrcpt'] = self.data[queueid]['size'] = ""
335  self.data[queueid]['bodyDate'] = self.data[queueid]['bodyFrom'] = self.data[queueid]['bodyFromDomain'] = self.data[queueid]['bodySubject'] = ""
336 
337  self.data[queueid]['antivirus'] = 'CLEAN : not analyzed !'
338 
339  self.data[queueid]['filename'] = []
340  self.data[queueid]['recip'] = []
341  self.data[queueid]['deferredRecip'] = []
342  self.data[queueid]['bouncedRecip'] = []
343  self.data[queueid]['recipType'] = defaultdict(dict)
344  self.data[queueid]['recipType']['internal'] = 0
345  self.data[queueid]['recipType']['external'] = 0
346 
347  self.data[queueid]['fileType'] = defaultdict(dict)
348  for thisType in self.theseFileTypes:
349  self.data[queueid]['fileType'][thisType] = 0
350 
351 
352 
353  # Message-id
354  cleanupLineEntries = self.cleanupLine.match(thisLine)
355  if cleanupLineEntries is not None:
356  queueid = cleanupLineEntries.group('queueid')
357  messageid = cleanupLineEntries.group('messageid').replace("<","").replace(">","")
358  self.data[queueid]['msgid'] = messageid
359 
360  # A hack because in case of delivery notification (from=<>), it could not have an "postfix/smtpd" line
361  if 'recip' in self.data[queueid].keys():
362  pass
363  else:
364  self.data[queueid]['recip'] = []
365  self.data[queueid]['recipType'] = defaultdict(dict)
366  self.data[queueid]['recipType']['internal'] = 0
367  self.data[queueid]['recipType']['external'] = 0
368 
369 
370 
371  if self.cDb.verifyIfMessageidExists(messageid):
372  # Because we have already parsed this message lines
373  del self.data[queueid]
374 
375  # We are now in update mode, to only catch DSN case and do an update of the existing entry
376  self.updateData[queueid] = defaultdict(dict)
377  self.updateData[queueid]['deferredRecip'] = []
378  self.updateData[queueid]['bouncedRecip'] = []
379  self.updateData[queueid]['msgid'] = messageid
380  self.updateSurveyQueuid.append(queueid)
381 
382  else:
383  self.surveyQueuid.append(queueid)
384  self.queueidFromMessageid[messageid] = queueid
385 
386  if '@timestamp' in self.data[queueid].keys():
387  timestamp = self.data[queueid]['@timestamp']/1000
388  else:
389  timestamp = int(time.time())
390 
391  # We insert messageid an timestamp in sqlite for later, in case we have 2 message blocs
392  tupl=(messageid,timestamp)
393  self.cDb.addEntry(tupl)
394 
395 
396  ## ANTIVIRUS
397 
398  # Kasperky Labs
399  klmsLineEntries = klmsLine.match(thisLine.replace("\"",""))
400  if klmsLineEntries is not None:
401  messageid = klmsLineEntries.group('messageid')
402  status = klmsLineEntries.group('status')
403  status += ", antivirus-status:" + klmsLineEntries.group('avstatus')
404  status += ", antiphishing-status:" + klmsLineEntries.group('apstatus')
405  status += ", antispam-status:" + klmsLineEntries.group('asstatus')
406 
407  if messageid in self.queueidFromMessageid.keys():
408  queueid = self.queueidFromMessageid[messageid]
409  self.data[queueid]['antivirus'] = status
410  try:
411  del self.queueidFromMessageid[messageid]
412  except KeyError:
413  pass
414 
415 
416 
417  #Amavis-New
418  amavisLineEntries = amavisLine.match(thisLine)
419  if amavisLineEntries is not None:
420  messageid = amavisLineEntries.group('messageid')
421  status = amavisLineEntries.group('status')
422 
423  if messageid in self.queueidFromMessageid.keys():
424  queueid = self.queueidFromMessageid[messageid]
425  self.data[queueid]['antivirus'] = status
426  try:
427  del self.queueidFromMessageid[messageid]
428  except KeyError:
429  pass
430 
431 
432 
433  # Filename if controled
434  filenameLineEntries = self.filenameLine.match(thisLine)
435  if filenameLineEntries is not None:
436  queueid = filenameLineEntries.group('queueid')
437  filename = filenameLineEntries.group('filename')
438 
439 
440  if queueid in self.surveyQueuid:
441 
442  fileType = filename.lower().split('.')[-1].replace("jpg","jpeg")
443 
444  if filename not in self.data[queueid]['filename']:
445  self.data[queueid]['filename'].append(filename)
446  try:
447  self.data[queueid]['fileType'][fileType] += 1
448  except:
449  self.data[queueid]['fileType'][fileType] = 0
450  self.data[queueid]['fileType'][fileType] += 1
451 
452 
453 
454 
455  ## Specific body message checks (see /etc/postfix/BMSEARCH_header_checks)
456 
457  # For these header controls we have to set the self.data[queueid] in every case, because lines
458  # are written by cleanup, before the message-id
459  # bodyDate if controled
460  bodyDateLineEntries = self.bodyDateLine.match(thisLine)
461  if bodyDateLineEntries is not None:
462  queueid = bodyDateLineEntries.group('queueid')
463  bodyDate = bodyDateLineEntries.group('bodyDate')
464 
465  self.data[queueid]['bodyDate'] = bodyDate
466 
467 
468 
469  # bodyFrom if controled
470  bodyFromLineEntries = self.bodyFromLine.match(thisLine)
471  if bodyFromLineEntries is not None:
472  queueid = bodyFromLineEntries.group('queueid')
473  bodyFrom = bodyFromLineEntries.group('bodyFrom')
474 
475  self.data[queueid]['bodyFrom'] = bodyFrom.lower().split('<')[-1].replace(">","")
476  self.data[queueid]['bodyFromDomain'] = bodyFrom.lower().split('@')[-1].replace(">","")
477 
478 
479 
480  # bodySubject if controled
481  bodySubjectLineEntries = self.bodySubjectLine.match(thisLine)
482  if bodySubjectLineEntries is not None:
483  queueid = bodySubjectLineEntries.group('queueid')
484  bodySubject = bodySubjectLineEntries.group('bodySubject')
485 
486  self.data[queueid]['bodySubject'] = self.decodev2(bodySubject)
487 
488 
489 
490  # Return to general info.
491  # Sender, size, nrcpt
492  qmgrLineEntries = self.qmgrLine.match(thisLine)
493  if qmgrLineEntries is not None:
494  queueid = qmgrLineEntries.group('queueid')
495  thisFrom = qmgrLineEntries.group('from')
496  size = qmgrLineEntries.group('size')
497  nrcpt = qmgrLineEntries.group('nrcpt')
498 
499  if len(thisFrom):
500  pass
501  else:
502  thisFrom='notification@none'
503 
504 
505  if queueid in self.surveyQueuid:
506  self.data[queueid]['size'] = size
507  self.data[queueid]['nrcpt'] = nrcpt
508  self.data[queueid]['from'] = thisFrom.lower()
509  self.data[queueid]['fromDomain'] = thisFrom.lower().split('@')[-1].replace(">","")
510 
511 
512 
513 
514 
515  # When delivery is OK
516  smtpLineDSN2Entries = self.smtpLineDSN2.match(thisLine)
517  if smtpLineDSN2Entries is not None:
518  queueid = smtpLineDSN2Entries.group('queueid')
519  recip = smtpLineDSN2Entries.group('recip')
520  orig = smtpLineDSN2Entries.group('orig')
521 
522  recipDomain = recip.lower().split('@')[-1]
523 
524  if queueid in self.surveyQueuid:
525  if queueid in self.data.keys():
526  if 'recipType' in self.data[queueid].keys():
527  pass
528  else:
529  self.data[queueid]['recipType'] = {'external': 0 , 'internal': 0 }
530 
531 
532  if self.data[queueid]['fromDomain'] == recipDomain :
533  self.data[queueid]['recipType']['internal'] += 1
534  else:
535  self.data[queueid]['recipType']['external'] += 1
536 
537  self.data[queueid]['recip'].append(recip.lower())
538 
539 
540 
541 
542  # When delivery is deferred (ie mailbox is overquota)
543  smtpLineDSN4Entries = self.smtpLineDSN4.match(thisLine)
544  if smtpLineDSN4Entries is not None:
545  queueid = smtpLineDSN4Entries.group('queueid')
546  deferredRecip = smtpLineDSN4Entries.group('recip')
547  orig = smtpLineDSN4Entries.group('orig')
548 
549  recipDomain = deferredRecip.lower().split('@')[-1]
550 
551  # 1rst time : create
552  if queueid in self.surveyQueuid:
553  if queueid in self.data.keys():
554 
555  if 'recipType' in self.data[queueid].keys():
556  pass
557  else:
558  self.data[queueid]['recipType'] = {'external': 0 , 'internal': 0 }
559 
560 
561  if self.data[queueid]['fromDomain'] == recipDomain :
562  self.data[queueid]['recipType']['internal'] += 1
563  else:
564  self.data[queueid]['recipType']['external'] += 1
565  self.data[queueid]['deferredRecip'].append(deferredRecip.lower())
566 
567 
568  # 2nd time : update
569  if queueid in self.updateSurveyQueuid:
570  self.updateData[queueid]['deferredRecip'].append(deferredRecip.lower())
571 
572 
573 
574  # When delivery is bounced (ie the mailbox does not exist or the msg is older than 5 days)
575  smtpLineDSN5Entries = self.smtpLineDSN5.match(thisLine)
576  if smtpLineDSN5Entries is not None:
577  queueid = smtpLineDSN5Entries.group('queueid')
578  bouncedRecip = smtpLineDSN5Entries.group('recip')
579  orig = smtpLineDSN5Entries.group('orig')
580 
581 
582  recipDomain = bouncedRecip.lower().split('@')[-1]
583 
584  # 1rst time : create
585  if queueid in self.surveyQueuid:
586  if queueid in self.data.keys():
587  if 'recipType' in self.data[queueid].keys():
588  pass
589  else:
590  self.data[queueid]['recipType'] = {'external': 0 , 'internal': 0 }
591 
592 
593  if self.data[queueid]['fromDomain'] == recipDomain :
594  self.data[queueid]['recipType']['internal'] += 1
595  else:
596  self.data[queueid]['recipType']['external'] += 1
597 
598  self.data[queueid]['bouncedRecip'].append(bouncedRecip.lower())
599 
600 
601  # 2nd time : update
602  if queueid in self.updateSurveyQueuid:
603  self.updateData[queueid]['bouncedRecip'].append(bouncedRecip.lower())
604 
605 
606 
607  # Because we end the mail bloc log, we can now put it in ELK
608  smtpRemovedEntries = self.smtpRemoved.match(thisLine)
609  if smtpRemovedEntries is not None:
610  queueid = smtpRemovedEntries.group('queueid')
611 
612  # 1rst time : create
613  if queueid in self.surveyQueuid:
614  if self.action == "storeToELK":
615  requestToELK().sendData(self.data[queueid])
616 
617  # In any case we delete all about this message to preserve memory
618  try:
619  del self.data[queueid]
620  except KeyError:
621  pass
622 
623  # 2nd time : update
624  if queueid in self.updateSurveyQueuid:
625  if self.action == "storeToELK":
626 
627  if len(self.updateData[queueid]['bouncedRecip']) or len(self.updateData[queueid]['deferredRecip']):
628  u = requestToELK()
629  time.sleep(self.timeInterval)
630  u.retrieveBmsearchId(self.updateData[queueid]['msgid'])
631  u.replaceDSNRecip(self.updateData[queueid])
632 
633  # In any case we delete all about this message to preserve memory
634  try:
635 
636  del self.updateData[queueid]
637  except KeyError:
638  pass
639 
640 
641  # Delete some bad entries
642  for badKey in ('table hash','warning','statistics'):
643  if badKey in self.data[queueid]:
644  try:
645  del self.data[queueid]
646  except KeyError:
647  pass
648 
649 
650 
Definition: sqlQuery.py:1
Definition: requestToELK.py:1
def mailLogParser.mailLogParser.setParam (   self,
  kwargs 
)
setParam:
        overload params with these in **kwargs.
        Params available are :
            file   
            action
            method
            dbStore
            dbtemp
            dbStoreMaxDays
            timeInterval
        For details have a look in /etc/bmSearch/config.py
114  def setParam(self, **kwargs):
115  """
116  setParam:
117  overload params with these in **kwargs.
118  Params available are :
119  file
120  action
121  method
122  dbStore
123  dbtemp
124  dbStoreMaxDays
125  timeInterval
126  For details have a look in /etc/bmSearch/config.py
127  """
128  self.maillog = config.maillog
129  if "file" in kwargs.keys():
130  self.maillog = kwargs['file']
131 
132  self.action = ""
133  if "action" in kwargs.keys():
134  self.action = kwargs['action']
135 
136  self.method = "file"
137  if "method" in kwargs.keys():
138  self.method = kwargs['method']
139 
140  self.dbStore = config.dbStore

Member Data Documentation

mailLogParser.mailLogParser.action

ANTIVIRUS.

Specific body message checks (see /etc/postfix/BMSEARCH_header_checks)

mailLogParser.mailLogParser.addonInfo
static
mailLogParser.mailLogParser.bodyDateLine
static
mailLogParser.mailLogParser.bodyFromLine
static
mailLogParser.mailLogParser.bodySubjectLine
static
mailLogParser.mailLogParser.cDb
mailLogParser.mailLogParser.cleanupLine
static
mailLogParser.mailLogParser.compressionCase
static
mailLogParser.mailLogParser.content = f.readlines()
static
mailLogParser.mailLogParser.cyrusDelivered
static
mailLogParser.mailLogParser.data
mailLogParser.mailLogParser.dates
static
mailLogParser.mailLogParser.dbStore
static
mailLogParser.mailLogParser.dbStoreMaxDays
static
mailLogParser.mailLogParser.dbtemp
static
mailLogParser.mailLogParser.f = open(self.maillog , 'r' )
static
mailLogParser.mailLogParser.filenameLine
static
mailLogParser.mailLogParser.ft = tarfile.open(self.maillog, self.compressionCase[ftype])
static
mailLogParser.mailLogParser.ftype
static
mailLogParser.mailLogParser.maillog
static
mailLogParser.mailLogParser.member0 = ft.getnames()[0]
static
mailLogParser.mailLogParser.method
mailLogParser.mailLogParser.p = subprocess.Popen(["zcat", self.maillog], stdout = subprocess.PIPE)
static
mailLogParser.mailLogParser.pat2
static
mailLogParser.mailLogParser.qmgrLine
static
mailLogParser.mailLogParser.queueidFromMessageid
mailLogParser.mailLogParser.smtpdLine
static
mailLogParser.mailLogParser.smtpLineDSN2
static
mailLogParser.mailLogParser.smtpLineDSN4
static
mailLogParser.mailLogParser.smtpLineDSN5
static
mailLogParser.mailLogParser.smtpRemoved
static
mailLogParser.mailLogParser.surveyQueuid
mailLogParser.mailLogParser.t = fileMIMEType()
static
mailLogParser.mailLogParser.theseFileTypes
static
mailLogParser.mailLogParser.timeInterval
static
mailLogParser.mailLogParser.updateData
static
mailLogParser.mailLogParser.updateSurveyQueuid
static

The documentation for this class was generated from the following file: