-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmax_seq.py
143 lines (116 loc) · 4.63 KB
/
max_seq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# max_seq.py
#
# Copyright 2019 Diserere <diserere@cooler>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Returns max substring containing not more than N different symbols
"""
__version__ = '1.1.3'
def is_matching(char, n, charmap, charmap_len):
"""
Matches given char with keys in dict, update value by n
if found, or add {key: value} if dict length is less
than max length, of returns False.
Args:
str char:
one-char string to match
int n:
current pos in string to save as last occurence
dict charmap:
dict with char as key and n as value
int charmap_len:
max num of elements
"""
# try to match with stack
if char in charmap or len(charmap) < charmap_len:
charmap[char] = n
match = True
else: # nor found neither added: not match
match = False
return match
def save_max_substr(cur_substr, max_substr):
# Use ">" here to save first max substr found instead of last one
if len(cur_substr) >= len(max_substr):
max_substr = cur_substr
return max_substr
def get_max_seq(string, charset_len):
"""
Return longest substring consisting of not more than given number of
different chars found in input string. If more than one string of
equal length is found then the last one is returned.
Args:
str string:
input string
int charset_len:
max number of different chars in sequence, expected to be >=0
"""
# charset_len should be int and positive
if type(charset_len) is not int:
raise TypeError('int charset_len: unexpected type: %s' % type(charset_len) )
elif charset_len < 0:
raise TypeError('charset_len: should not be negative: %s' % str(charset_len) )
# Null charset_len cause exception while pop from null list, see below
if len(string) == 0 or charset_len == 0:
return ""
# start from beginning of string
p = 0
max_substr = ''
cur_substr = ''
charset = dict()
while p <= len(string)-1:
cur_char = string[p]
if not is_matching(cur_char, p, charset, charset_len):
# end of sequence is found; compare and save result
max_substr = save_max_substr(cur_substr, max_substr)
# get key of leftmost (by addr) element from charmap
min_key = min(charset, key=charset.get)
# get new start address from its address
new_start_addr = charset[min_key] - p + len(cur_substr) + 1
# remove leftmost element
charset.pop(min_key)
# add new char to charmap
charset[cur_char] = p
# trunk current ss for new sequence
cur_substr = cur_substr[new_start_addr:]
# add last char to ss, fwd ptr
cur_substr += cur_char
p += 1
# here the ptr is at the end of string, compare results again
max_substr = save_max_substr(cur_substr, max_substr)
return max_substr
def main(args):
import argparse
d_MAX_SEQ_LEN = 2
parser = argparse.ArgumentParser(
description='Python script for print max substring from given string containing not more than '+ str(d_MAX_SEQ_LEN) + ' symbols',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('str', type=str, metavar='STRING', default="", help='String to find a sequence')
parser.add_argument('-n', '--num', type=int, metavar='NUM', default=d_MAX_SEQ_LEN, help='Max. number of different symbols in substring')
aargs = parser.parse_args()
string = aargs.str
max_charset = aargs.num
max_substr = get_max_seq(string, max_charset)
print ("Last substring of max length of " + str(len(max_substr)) + " is:")
print ("[" + max_substr + "]")
if __name__ == '__main__':
import sys
sys.exit(main(sys.argv))