Created
September 4, 2022 01:18
-
-
Save synth/715fd15222395f0c6aae9ec65d69df05 to your computer and use it in GitHub Desktop.
Ruby port of truncation.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Ported from a node script that seemed to do truncation well | |
# https://github.com/huang47/nodejs-html-truncate/blob/master/lib/truncate.js | |
class Trunc | |
def self.trunc(str, max_length, options = {}) | |
new(str, max_length, options).trunc | |
end | |
attr_reader :str | |
EMPTY_OBJECT = {} | |
EMPTY_STRING = '' | |
DEFAULT_TRUNCATE_SYMBOL = '...' | |
DEFAULT_SLOP = 10 | |
EXCLUDE_TAGS = ['img', 'br'] # non-closed tags | |
KEY_VALUE_REGEX = '([\\w|-]+\\s*=\\s*"[^"]*"\\s*)*' | |
IS_CLOSE_REGEX = '\\s*\\/?\\s*' | |
CLOSE_REGEX = '\\s*\\/\\s*' | |
SELF_CLOSE_REGEX = Regexp.new('<\\/?\\w+\\s*' + KEY_VALUE_REGEX + CLOSE_REGEX + '>') | |
HTML_TAG_REGEX = Regexp.new('<\\/?\\w+\\s*' + KEY_VALUE_REGEX + IS_CLOSE_REGEX + '>') | |
URL_REGEX = Regexp.new('(((ftp|https?):\/\/)[\-\w@:%_\+.~#?,&\/\/=]+)|((mailto:)?[_.\w\-]+@([\w][\w\-]+\.)+[a-zA-Z]{2,3})') # Simple regexp | |
IMAGE_TAG_REGEX = Regexp.new('<img\\s*' + KEY_VALUE_REGEX + IS_CLOSE_REGEX + '>') | |
WORD_BREAK_REGEX = Regexp.new('\\W+', 'g') | |
def initialize(string, max_length, options = {}) | |
@string = string | |
@max_length = max_length | |
@options = options | |
@items = [] # stack for saving tags | |
@total = 0 #record how many characters we traced so far | |
@content = EMPTY_STRING #truncated text storage | |
@tag | |
@selfClose | |
end | |
def trunc | |
string_copy = @string.dup | |
@options = Hashie::Mash.new(@options || EMPTY_OBJECT) | |
@options.ellipsis = (nil != @options.ellipsis) ? @options.ellipsis : DEFAULT_TRUNCATE_SYMBOL | |
@options.truncateLastWord = (nil != @options.truncateLastWord) ? @options.truncateLastWord : true | |
@options.slop = (nil != @options.slop) ? @options.slop : (DEFAULT_SLOP > @max_length ? @max_length : DEFAULT_SLOP) | |
matches = true | |
while (matches) do | |
matches = HTML_TAG_REGEX.match(string_copy) | |
matches_index = string_copy.index(HTML_TAG_REGEX) | |
if (!matches) | |
if (@total >= @max_length) | |
break | |
end | |
matches = URL_REGEX.match(string_copy) | |
matches_index = string_copy.index(URL_REGEX) | |
if (!matches || matches_index >= @max_length) | |
@content += string_copy[0, _getEndPosition(string_copy)] | |
break | |
end | |
while (matches) do | |
result = matches[0] | |
index = matches_index | |
@content += string_copy[0, (index + @result.length) - @total] | |
string_copy = string_copy[index + @result.length] | |
matches = URL_REGEX.match(string_copy) | |
matches_index = string_copy.index(URL_REGEX) | |
end | |
break | |
end | |
result = matches[0] | |
index = matches_index | |
if (@total + index > @max_length) | |
# exceed given `max_length`, dump everything to clear stack | |
@content += string_copy[0, _getEndPosition(string_copy, index)] | |
break | |
else | |
@total += index | |
@content += string_copy[0, index] | |
end | |
if ('/' === result[1]) | |
log "Popping items" | |
# move out open tag | |
@items.pop() | |
@selfClose=nil | |
else | |
@selfClose = SELF_CLOSE_REGEX.match(result) | |
matches_index = string_copy.index(URL_REGEX) | |
if (!@selfClose) | |
@tag = _getTag(result) | |
log "Pushing item: #{@tag}" | |
@items.push(@tag) | |
end | |
end | |
if (@selfClose) | |
@content += @selfClose[0] | |
else | |
@content += result | |
end | |
string_copy = string_copy[index + result.length, string_copy.length] | |
end | |
if (string_copy.length > @max_length - @total && @options.ellipsis) | |
@content += @options.ellipsis | |
end | |
@content += _dumpCloseTag(@items) | |
if ([email protected]) | |
@content = _removeImageTag(@content) | |
end | |
return @content | |
end | |
def _removeImageTag(string) | |
match = IMAGE_TAG_REGEX.match(string) | |
if (!match) | |
return string | |
end | |
@index = match.index | |
len = match[0].length | |
return string[0, @index] + string[@index + len] | |
end | |
def _dumpCloseTag(tags) | |
log "DumpCloseTag: #{tags}" | |
html = '' | |
tags.reverse.each do |tag, index| | |
#dump non-excluded tags only | |
if(nil === EXCLUDE_TAGS.index(tag)) | |
html += '</' + tag + '>' | |
end | |
end | |
log "DumpCloseTagReturn: #{html}" | |
return html | |
end | |
def _getTag(string) | |
log "getTag: #{string}" | |
tail = string.index(' ') | |
# TODO: | |
# we have to figure out how to handle non-well-formatted HTML case | |
if (nil == tail) | |
tail = string.index('>') | |
if (nil == tail) | |
raise 'HTML tag is not well-formed : ' + string | |
end | |
end | |
log "getTagReturn: #{string[1, tail-1]}" | |
return string[1, tail-1] | |
end | |
def _getEndPosition (string, tailPos = nil) | |
defaultPos = @max_length - @total | |
position = defaultPos | |
isShort = defaultPos < @options.slop | |
slopPos = isShort ? defaultPos : @options.slop - 1 | |
startSlice = isShort ? 0 : defaultPos - @options.slop | |
endSlice = tailPos || (defaultPos + @options.slop) | |
if ([email protected]) | |
substr = string.slice(startSlice, endSlice) | |
if (tailPos && substr.length <= tailPos) | |
position = substr.length | |
else | |
while ((@result = WORD_BREAK_REGEX.match(substr)) != null) do | |
# a natural break position before the hard break position | |
if (@result.index < slopPos) | |
position = defaultPos - (slopPos - @result.index) | |
# keep seeking closer to the hard break position | |
# unless a natural break is at position 0 | |
if (@result.index === 0 && defaultPos <= 1) | |
break | |
end | |
# a natural break position exactly at the hard break position | |
elsif (@result.index === slopPos) | |
position = defaultPos | |
break # seek no more | |
# a natural break position after the hard break position | |
else | |
position = defaultPos + (@result.index - slopPos) | |
break # seek no more | |
end | |
end | |
end | |
if string.charAt(position - 1).match(/\s$/) | |
position -= 1 | |
end | |
end | |
return position | |
end | |
def log(msg) | |
puts msg if @debug | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment