bistaumanga/dtw_pattern.py

## dtw_pattern.py
# -*- coding: utf-8 -*-
"""
Created on Tue Mar 11 11:05:05 2014

@author: logpoint
"""

import numpy as np


def align_DTW(seq, ref_seq, window = 5):
    # create the cost metric
    d = lambda x, y : not x == y
    # convert to numpy- array
    seq, ref_seq = np.array(seq), np.array(ref_seq)
    M, N = len(seq), len(ref_seq)
    cost = float('inf') * np.ones((M, N))

    # initialize the first row and column
    cost[0, 0] = d(seq[0], ref_seq[0])
    for i in range(1, M):
        cost[i, 0] = cost[i-1, 0] + d(seq[i], ref_seq[0])

    for j in range(1, N):
        cost[0, j] = cost[0, j-1] + d(seq[0], ref_seq[j])
    # fill in the rest of the matrix
    for i in range(1, M):
        for j in range(max(1, i - window), min(N, i + window)):
            choices = cost[i - 1, j - 1], cost[i, j-1], cost[i-1, j]
            is_different = d(seq[i], ref_seq[j])
            cost[i, j] = min(choices) + d(seq[i], ref_seq[j]) + is_different

    # find the optimal path
    n, m = N - 1, M - 1
#    path = []
    path_cost = []
    alignment = []

    while (m, n) != (0, 0):
#        path.append((seq[m], ref_seq[n]))
        if not d(seq[m], ref_seq[n]):
            alignment.append(ref_seq[n])
        else:
            alignment.append('*')
        path_cost.append(cost[m, n])
        m, n = min((m - 1, n), (m, n - 1), (m - 1, n - 1), key = lambda x: cost[x[0], x[1]])


    if not d(seq[0], ref_seq[0]):
        alignment.append(ref_seq[0])
    else:
        alignment.append('*')
#    path.append((seq[0],ref_seq[0]))
    path_cost.append(0)
    temp = " ".join(alignment[::-1])
    import re
    return re.sub(r"((\s)*\*(\s)*){2,}"," * ", temp)


def align_simple(seq, ref_seq):
    pattern = []
    for word1, word2 in zip(ref_seq, seq):
        try:
            assert word1 == word2
            pattern.append(word1)
        except AssertionError:
            pattern.append("*")
    return " ".join(pattern)


if __name__ == "__main__":
    text = ["user umanga logged in from 173.21.11.33 with mozilla browser",
            "user puneet khanal logged in from localhost with chrome browser",
            "user basanta logged out from 110.44.114.70",
            "user ajay logged out from remote"]
#    print align_DTW(text[0].split(), text[1].split())
    pattern = {}
    for line in iter(text):
        if not len(pattern):
            pattern[1] = line
        else:
            log_tokens = line.split()
            pattern_tokens = pattern[1].split()
            pattern[1] = align_DTW(log_tokens, pattern_tokens)
        print line, '||', pattern[1]

    print pattern[1]
	# -- coding: utf-8 --
	"""
	Created on Tue Mar 11 11:05:05 2014

	@author: logpoint
	"""

	import numpy as np


	def align_DTW(seq, ref_seq, window = 5):
	# create the cost metric
	d = lambda x, y : not x == y
	# convert to numpy- array
	seq, ref_seq = np.array(seq), np.array(ref_seq)
	M, N = len(seq), len(ref_seq)
	cost = float('inf') * np.ones((M, N))

	# initialize the first row and column
	cost[0, 0] = d(seq[0], ref_seq[0])
	for i in range(1, M):
	cost[i, 0] = cost[i-1, 0] + d(seq[i], ref_seq[0])

	for j in range(1, N):
	cost[0, j] = cost[0, j-1] + d(seq[0], ref_seq[j])
	# fill in the rest of the matrix
	for i in range(1, M):
	for j in range(max(1, i - window), min(N, i + window)):
	choices = cost[i - 1, j - 1], cost[i, j-1], cost[i-1, j]
	is_different = d(seq[i], ref_seq[j])
	cost[i, j] = min(choices) + d(seq[i], ref_seq[j]) + is_different

	# find the optimal path
	n, m = N - 1, M - 1
	# path = []
	path_cost = []
	alignment = []

	while (m, n) != (0, 0):
	# path.append((seq[m], ref_seq[n]))
	if not d(seq[m], ref_seq[n]):
	alignment.append(ref_seq[n])
	else:
	alignment.append('*')
	path_cost.append(cost[m, n])
	m, n = min((m - 1, n), (m, n - 1), (m - 1, n - 1), key = lambda x: cost[x[0], x[1]])


	if not d(seq[0], ref_seq[0]):
	alignment.append(ref_seq[0])
	else:
	alignment.append('*')
	# path.append((seq[0],ref_seq[0]))
	path_cost.append(0)
	temp = " ".join(alignment[::-1])
	import re
	return re.sub(r"((\s)\(\s)){2,}"," ", temp)



	def align_simple(seq, ref_seq):
	pattern = []
	for word1, word2 in zip(ref_seq, seq):
	try:
	assert word1 == word2
	pattern.append(word1)
	except AssertionError:
	pattern.append("*")
	return " ".join(pattern)


	if __name__ == "__main__":
	text = ["user umanga logged in from 173.21.11.33 with mozilla browser",
	"user puneet khanal logged in from localhost with chrome browser",
	"user basanta logged out from 110.44.114.70",
	"user ajay logged out from remote"]
	# print align_DTW(text[0].split(), text[1].split())
	pattern = {}
	for line in iter(text):
	if not len(pattern):
	pattern[1] = line
	else:
	log_tokens = line.split()
	pattern_tokens = pattern[1].split()
	pattern[1] = align_DTW(log_tokens, pattern_tokens)
	print line, '\|\|', pattern[1]

	print pattern[1]