try this code (python 3.x):
notags=('img','br','hr') def substring2(html,size): if len(html) <= size: return html result,tag,count='','',0 tags=[] for c in html: result += c if c == '<': intag=True elif c=='>': intag=False tag=tag.split()[0] if tag[0] == '/': tag = tag.replace('/','') if tag not in notags: tags.pop() else: if tag[-1] != '/' and tag not in notags: tags.append(tag) tag='' else: if intag: tag += c else: count+=1 if count>=size: break while len(tags)>0: result += '</{0}>'.format(tags.pop()) return result s='<div class="main">html <code>substring</code> function written by <span>imxylz</span>, using <a href="http://www.python.org">python</a> language</div>' print(s) for size in (30,40,55): print(substring2(s,size))
Output
<div class="main">html <code>substring</code> function written by <span>imxylz</span>, using <a href="http://www.python.org">python</a> language</div> <div class="main">html <code>substring</code> function writte</div> <div class="main">html <code>substring</code> function written by <span>imxyl</span></div> <div class="main">html <code>substring</code> function written by <span>imxylz</span>, using <a href="http://www.python.org">python</a></div>
more
See the github code.
Another question .
source share