How to write a function that can cut a string with HTML tags to an N-length string without breaking HTML tags while doing it.
如何编写一个函数,可以将带有HTML标记的字符串剪切为N长度的字符串,而不会在执行此操作时破坏HTML标记。
The returned string doesn't need to be exactly N characters long. It can cut it before or after tag that is on the edge of N-long string.
返回的字符串不需要长度为N个字符。它可以在N长字符串边缘的标签之前或之后剪切它。
Visit <a href="www.htz.hr">Croatia</a> this summer.
CutIt(9)
should return
CutIt(9)应该返回
Visit
or
Visit <a href="www.htz.hr">Croatia</a>
7 个解决方案
#1
function trimHtml(html, options) {
options = options || {};
var limit = options.limit || 100,
preserveTags = (typeof options.preserveTags !== 'undefined') ? options.preserveTags : true,
wordBreak = (typeof options.wordBreak !== 'undefined') ? options.wordBreak : false,
suffix = options.suffix || '...',
moreLink = options.moreLink || '';
var arr = html.replace(/</g, "\n<")
.replace(/>/g, ">\n")
.replace(/\n\n/g, "\n")
.replace(/^\n/g, "")
.replace(/\n$/g, "")
.split("\n");
var sum = 0,
row, cut, add,
tagMatch,
tagName,
tagStack = [],
more = false;
for (var i = 0; i < arr.length; i++) {
row = arr[i];
// count multiple spaces as one character
rowCut = row.replace(/[ ]+/g, ' ');
if (!row.length) {
continue;
}
if (row[0] !== "<") {
if (sum >= limit) {
row = "";
} else if ((sum + rowCut.length) >= limit) {
cut = limit - sum;
if (row[cut - 1] === ' ') {
while(cut){
cut -= 1;
if(row[cut - 1] !== ' '){
break;
}
}
} else {
add = row.substring(cut).split('').indexOf(' ');
// break on halh of word
if(!wordBreak) {
if (add !== -1) {
cut += add;
} else {
cut = row.length;
}
}
}
row = row.substring(0, cut) + suffix;
if (moreLink) {
row += '<a href="' + moreLink + '" style="display:inline">»</a>';
}
sum = limit;
more = true;
} else {
sum += rowCut.length;
}
} else if (!preserveTags) {
row = '';
} else if (sum >= limit) {
tagMatch = row.match(/[a-zA-Z]+/);
tagName = tagMatch ? tagMatch[0] : '';
if (tagName) {
if (row.substring(0, 2) !== '</') {
tagStack.push(tagName);
row = '';
} else {
while (tagStack[tagStack.length - 1] !== tagName && tagStack.length) {
tagStack.pop();
}
if (tagStack.length) {
row = '';
}
tagStack.pop();
}
} else {
row = '';
}
}
arr[i] = row;
}
return {
html: arr.join("\n").replace(/\n/g, ""),
more: more
};
}
if (typeof module !== 'undefined' && module.exports) {
module.exports = trimHtml;
}
usage
var html = '<div><p>Lorem ipsum dolor sit amet, consectetur adipisicing elit,
sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. </p><p>Ut
enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip
ex ea commodo consequat. </p><p>Duis aute irure dolor in reprehenderit in
voluptate velit esse cillum dolore eu fugiat nulla pariatur. </p><p>Excepteur
sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit
anim id est laborum.</p></div>';
var trim = trimHtml(html, { limit: 200 });
// **returns object**
{
html: '<div><p>Lorem ipsum dolor sit amet, consectetur adipisicing elit,
sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. </p><p>Ut
enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut...
</p></div>',
more: true // indicates if limit is reached
}
#2
I solved the problem so here is the code in c#;
我解决了这个问题,所以这里是c#中的代码;
static string CutIt(string s, int limit)
{
if (s.Length < limit) return s;
int okIndex = 0;
bool inClosingTag = false;
int numOpenTags = 0;
for (int i = 0; i < limit; i++)
{
if (s[i]=='<')
{
if (s[i+1]=='/')
{
inClosingTag = true;
}
else
{
numOpenTags++;
}
}
if (s[i]=='>')
{
if (s[i-1]=='/')
{
numOpenTags--;
}
if (inClosingTag)
{
numOpenTags--;
}
}
if (numOpenTags == 0) okIndex = i;
}
return s.Substring(0, okIndex + 1);
}
#3
This might be overkill, but try looking up AWK, it can do this kind of things pretty easily since it's centered around processing text.
这可能是矫枉过正,但尝试查找AWK,它可以很容易地做这种事情,因为它以处理文本为中心。
You can also write a custom parsing script like
您也可以编写自定义解析脚本
string s = "Visit <a href="www.htz.hr">Croatia</a> this summer."
result = ""
slice_limit = 9
i= 0
j = 0
in_tag = false
while i < slice_limit and j < s.size do
if s[j] == "<" then in_tag = true
if in_tag and s[i]==">" then in_tag = false
if !in_tag then i++
result += s[j]
end
... or something like that (haven't tested, but it gives you the idea).
......或类似的东西(尚未测试,但它给你的想法)。
EDIT: You will also have to add something to detect if the tag is closed or not (just add a flag like in_tag and mix it with some regular expression and it should work) Hope that helps
编辑:您还必须添加一些东西来检测标签是否关闭(只需添加一个像in_tag的标志,并将其与一些正则表达式混合,它应该工作)希望有帮助
EDIT2: if you gave the language you want to use, that could be helpful. javascript?
EDIT2:如果您提供了想要使用的语言,那可能会有所帮助。 JavaScript的?
#4
static string CutIt(string s, int limit)
{
s = s.Substring(0, limit);
int openMark = s.LastIndexOf('<');
if (openMark != -1)
{
int closeMark = s.LastIndexOf('>');
if (openMark > closeMark)
{
s = s.Substring(0, openMark);
}
}
return s.Trim();
}
public static void Main()
{
Console.WriteLine(
CutIt("Visit <a href=\"www.htz.hr\">Croatia</a> this summer.", 9)
); // prints "Visit"
}
#5
When I encountered such problem (for RSS feed) I just called strip_tags before cutting my string.
当我遇到这样的问题时(对于RSS feed)我在剪切字符串之前就调用了strip_tags。
#6
In javascript, you can use the textContent property of DOM elements to obtain this.
在javascript中,您可以使用DOM元素的textContent属性来获取它。
HTML
<p id='mytext'>Hey <a href="#">Visit Croatia</a> today</p>
Javascript
var el = document.getElementById("mytext");
console.log( el.textContent );
//alert( el.textContent ); // if you don't have firebug.
#1
function trimHtml(html, options) {
options = options || {};
var limit = options.limit || 100,
preserveTags = (typeof options.preserveTags !== 'undefined') ? options.preserveTags : true,
wordBreak = (typeof options.wordBreak !== 'undefined') ? options.wordBreak : false,
suffix = options.suffix || '...',
moreLink = options.moreLink || '';
var arr = html.replace(/</g, "\n<")
.replace(/>/g, ">\n")
.replace(/\n\n/g, "\n")
.replace(/^\n/g, "")
.replace(/\n$/g, "")
.split("\n");
var sum = 0,
row, cut, add,
tagMatch,
tagName,
tagStack = [],
more = false;
for (var i = 0; i < arr.length; i++) {
row = arr[i];
// count multiple spaces as one character
rowCut = row.replace(/[ ]+/g, ' ');
if (!row.length) {
continue;
}
if (row[0] !== "<") {
if (sum >= limit) {
row = "";
} else if ((sum + rowCut.length) >= limit) {
cut = limit - sum;
if (row[cut - 1] === ' ') {
while(cut){
cut -= 1;
if(row[cut - 1] !== ' '){
break;
}
}
} else {
add = row.substring(cut).split('').indexOf(' ');
// break on halh of word
if(!wordBreak) {
if (add !== -1) {
cut += add;
} else {
cut = row.length;
}
}
}
row = row.substring(0, cut) + suffix;
if (moreLink) {
row += '<a href="' + moreLink + '" style="display:inline">»</a>';
}
sum = limit;
more = true;
} else {
sum += rowCut.length;
}
} else if (!preserveTags) {
row = '';
} else if (sum >= limit) {
tagMatch = row.match(/[a-zA-Z]+/);
tagName = tagMatch ? tagMatch[0] : '';
if (tagName) {
if (row.substring(0, 2) !== '</') {
tagStack.push(tagName);
row = '';
} else {
while (tagStack[tagStack.length - 1] !== tagName && tagStack.length) {
tagStack.pop();
}
if (tagStack.length) {
row = '';
}
tagStack.pop();
}
} else {
row = '';
}
}
arr[i] = row;
}
return {
html: arr.join("\n").replace(/\n/g, ""),
more: more
};
}
if (typeof module !== 'undefined' && module.exports) {
module.exports = trimHtml;
}
usage
var html = '<div><p>Lorem ipsum dolor sit amet, consectetur adipisicing elit,
sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. </p><p>Ut
enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip
ex ea commodo consequat. </p><p>Duis aute irure dolor in reprehenderit in
voluptate velit esse cillum dolore eu fugiat nulla pariatur. </p><p>Excepteur
sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit
anim id est laborum.</p></div>';
var trim = trimHtml(html, { limit: 200 });
// **returns object**
{
html: '<div><p>Lorem ipsum dolor sit amet, consectetur adipisicing elit,
sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. </p><p>Ut
enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut...
</p></div>',
more: true // indicates if limit is reached
}
#2
I solved the problem so here is the code in c#;
我解决了这个问题,所以这里是c#中的代码;
static string CutIt(string s, int limit)
{
if (s.Length < limit) return s;
int okIndex = 0;
bool inClosingTag = false;
int numOpenTags = 0;
for (int i = 0; i < limit; i++)
{
if (s[i]=='<')
{
if (s[i+1]=='/')
{
inClosingTag = true;
}
else
{
numOpenTags++;
}
}
if (s[i]=='>')
{
if (s[i-1]=='/')
{
numOpenTags--;
}
if (inClosingTag)
{
numOpenTags--;
}
}
if (numOpenTags == 0) okIndex = i;
}
return s.Substring(0, okIndex + 1);
}
#3
This might be overkill, but try looking up AWK, it can do this kind of things pretty easily since it's centered around processing text.
这可能是矫枉过正,但尝试查找AWK,它可以很容易地做这种事情,因为它以处理文本为中心。
You can also write a custom parsing script like
您也可以编写自定义解析脚本
string s = "Visit <a href="www.htz.hr">Croatia</a> this summer."
result = ""
slice_limit = 9
i= 0
j = 0
in_tag = false
while i < slice_limit and j < s.size do
if s[j] == "<" then in_tag = true
if in_tag and s[i]==">" then in_tag = false
if !in_tag then i++
result += s[j]
end
... or something like that (haven't tested, but it gives you the idea).
......或类似的东西(尚未测试,但它给你的想法)。
EDIT: You will also have to add something to detect if the tag is closed or not (just add a flag like in_tag and mix it with some regular expression and it should work) Hope that helps
编辑:您还必须添加一些东西来检测标签是否关闭(只需添加一个像in_tag的标志,并将其与一些正则表达式混合,它应该工作)希望有帮助
EDIT2: if you gave the language you want to use, that could be helpful. javascript?
EDIT2:如果您提供了想要使用的语言,那可能会有所帮助。 JavaScript的?
#4
static string CutIt(string s, int limit)
{
s = s.Substring(0, limit);
int openMark = s.LastIndexOf('<');
if (openMark != -1)
{
int closeMark = s.LastIndexOf('>');
if (openMark > closeMark)
{
s = s.Substring(0, openMark);
}
}
return s.Trim();
}
public static void Main()
{
Console.WriteLine(
CutIt("Visit <a href=\"www.htz.hr\">Croatia</a> this summer.", 9)
); // prints "Visit"
}
#5
When I encountered such problem (for RSS feed) I just called strip_tags before cutting my string.
当我遇到这样的问题时(对于RSS feed)我在剪切字符串之前就调用了strip_tags。
#6
In javascript, you can use the textContent property of DOM elements to obtain this.
在javascript中,您可以使用DOM元素的textContent属性来获取它。
HTML
<p id='mytext'>Hey <a href="#">Visit Croatia</a> today</p>
Javascript
var el = document.getElementById("mytext");
console.log( el.textContent );
//alert( el.textContent ); // if you don't have firebug.
#7
Here is JavaScript solution: trimHtml
这是JavaScript解决方案:trimHtml