I'm trying to convert HTML Codes like the &#XXXX; (where XXXX is a number) to plain text using classic ASP (VBScript).

我正在尝试转换HTML代码,例如&#XXXX; (其中XXXX是一个数字)使用经典ASP(VBScript)的纯文本。

I'm adding the text to an email which is in plain text format and if I add them as HTML Codes, it just displays the code and doesn't convert them.


One fix would be to change the email to be HTML which does fix that problem but then causes other problems for my email which I won't go into.


Is there a built in function or a custom function I can use to convert these HTML Codes to plain text?


3 个解决方案



What you need is HTML Decode, though unfortunately ASP doesn't include one.


This function, found on ASP Nut, and modified heavily by me, should do what you need. I tested it as vbscript running on my local computer and it seemed to work well, even with Unicode symbols in the 1000+ range.

这个功能可以在ASP Nut上找到,并且由我大量修改,应该可以满足您的需求。我测试它是在我的本地计算机上运行的vbscript,它似乎运行良好,即使在1000+范围内的Unicode符号。

Function HTMLDecode(sText)
    Dim regEx
    Dim matches
    Dim match
    sText = Replace(sText, """, Chr(34))
    sText = Replace(sText, "<"  , Chr(60))
    sText = Replace(sText, ">"  , Chr(62))
    sText = Replace(sText, "&" , Chr(38))
    sText = Replace(sText, " ", Chr(32))

    Set regEx= New RegExp

    With regEx
     .Pattern = "&#(\d+);" 'Match html unicode escapes
     .Global = True
    End With

    Set matches = regEx.Execute(sText)

    'Iterate over matches
    For Each match in matches
        'For each unicode match, replace the whole match, with the ChrW of the digits.

        sText = Replace(sText, match.Value, ChrW(match.SubMatches(0)))

    HTMLDecode = sText
End Function

Note: You'll need script version 5.0 installed on your server to use the RegExp object.




A more exausted decoder epanding C. Ross response

一个更加执行的解码器epanding C. Ross回应

Function HTMLDecode(sText)
    Dim regEx
    Dim matches
    Dim match
    sText = Replace(sText, """, Chr(34))
    sText = Replace(sText, "<"  , Chr(60))
    sText = Replace(sText, ">"  , Chr(62))
    sText = Replace(sText, "&" , Chr(38))
    sText = Replace(sText, " ", Chr(32))

    sText = Replace(sText, "¡", Chr(161))
    sText = Replace(sText, "£", Chr(163))
    sText = Replace(sText, "¥", Chr(165))
    sText = Replace(sText, "©", Chr(168))
    sText = Replace(sText, "«", Chr(171))
    sText = Replace(sText, "»", Chr(187))
    sText = Replace(sText, "¿", Chr(191))
    sText = Replace(sText, "À", Chr(192))
    sText = Replace(sText, "Á", Chr(193))
    sText = Replace(sText, "Â", Chr(194))
    sText = Replace(sText, "Ã", Chr(195))
    sText = Replace(sText, "Ä", Chr(196))
    sText = Replace(sText, "Å", Chr(197))
    sText = Replace(sText, "Æ", Chr(198))
    sText = Replace(sText, "Ç", Chr(199))
    sText = Replace(sText, "È", Chr(200))
    sText = Replace(sText, "É", Chr(201))
    sText = Replace(sText, "Ê", Chr(202))
    sText = Replace(sText, "Ë", Chr(203))
    sText = Replace(sText, "Ì", Chr(204))
    sText = Replace(sText, "Í", Chr(205))
    sText = Replace(sText, "Î", Chr(206))
    sText = Replace(sText, "Ï", Chr(207))
    sText = Replace(sText, "Ñ", Chr(209))
    sText = Replace(sText, "Ò", Chr(210))
    sText = Replace(sText, "Ó", Chr(211))
    sText = Replace(sText, "Ô", Chr(212))
    sText = Replace(sText, "Õ", Chr(213))
    sText = Replace(sText, "Ö", Chr(214))
    sText = Replace(sText, "×", Chr(215))
    sText = Replace(sText, "Ø", Chr(216))
    sText = Replace(sText, "Ù", Chr(217))
    sText = Replace(sText, "Ú", Chr(218))
    sText = Replace(sText, "Û", Chr(219))
    sText = Replace(sText, "Ü", Chr(220))
    sText = Replace(sText, "Ý", Chr(221))
    sText = Replace(sText, "Þ", Chr(222))
    sText = Replace(sText, "ß", Chr(223))
    sText = Replace(sText, "à", Chr(224))
    sText = Replace(sText, "á", Chr(225))
    sText = Replace(sText, "â", Chr(226))
    sText = Replace(sText, "ã", Chr(227))
    sText = Replace(sText, "ä", Chr(228))
    sText = Replace(sText, "å", Chr(229))
    sText = Replace(sText, "æ", Chr(230))
    sText = Replace(sText, "ç", Chr(231))
    sText = Replace(sText, "è", Chr(232))
    sText = Replace(sText, "é", Chr(233))
    sText = Replace(sText, "ê", Chr(234))
    sText = Replace(sText, "ë", Chr(235))
    sText = Replace(sText, "ì", Chr(236))
    sText = Replace(sText, "í", Chr(237))
    sText = Replace(sText, "î", Chr(238))
    sText = Replace(sText, "ï", Chr(239))
    sText = Replace(sText, "ð", Chr(240))
    sText = Replace(sText, "ñ", Chr(241))
    sText = Replace(sText, "ò", Chr(242))
    sText = Replace(sText, "ó", Chr(243))
    sText = Replace(sText, "ô", Chr(244))
    sText = Replace(sText, "õ", Chr(245))
    sText = Replace(sText, "ö", Chr(246))
    sText = Replace(sText, "÷", Chr(247))
    sText = Replace(sText, "ø", Chr(248))
    sText = Replace(sText, "ù", Chr(249))
    sText = Replace(sText, "ú", Chr(250))
    sText = Replace(sText, "û", Chr(251))
    sText = Replace(sText, "ü", Chr(252))
    sText = Replace(sText, "ý", Chr(253))
    sText = Replace(sText, "þ", Chr(254))
    sText = Replace(sText, "ÿ", Chr(255))

    Set regEx= New RegExp

    With regEx
     .Pattern = "&#(\d+);" 'Match html unicode escapes
     .Global = True
    End With

    Set matches = regEx.Execute(sText)

    'Iterate over matches
    For Each match in matches
        'For each unicode match, replace the whole match, with the ChrW of the digits.

        sText = Replace(sText, match.Value, ChrW(match.SubMatches(0)))

    HTMLDecode = sText
End Function



I made this based on C.Ross's answer above and some code from a fb_app.inc someone else made. It should do the trick.


    Option Explicit

    Dim objHelper
    Set objHelper = New HtmlEntityToUnicode

    Response.Write(objHelper.HtmlDecode("<br/><br/>hi  &#128154; green heart! purple heart &#128156; ! "))

    Set objHelper = Nothing

    Class HtmlEntityToUnicode
    dim BITS_TO_A_BYTE 
    dim BYTES_TO_A_WORD 
    dim BITS_TO_A_WORD 
    Dim m_lOnBits(30)
    Dim m_l2Power(30)

    Sub Class_Initialize()

        BITS_TO_A_BYTE = 8
        BYTES_TO_A_WORD = 4
        BITS_TO_A_WORD = 32

        m_lOnBits(0) = CLng(1)
        m_lOnBits(1) = CLng(3)
        m_lOnBits(2) = CLng(7)
        m_lOnBits(3) = CLng(15)
        m_lOnBits(4) = CLng(31)
        m_lOnBits(5) = CLng(63)
        m_lOnBits(6) = CLng(127)
        m_lOnBits(7) = CLng(255)
        m_lOnBits(8) = CLng(511)
        m_lOnBits(9) = CLng(1023)
        m_lOnBits(10) = CLng(2047)
        m_lOnBits(11) = CLng(4095)
        m_lOnBits(12) = CLng(8191)
        m_lOnBits(13) = CLng(16383)
        m_lOnBits(14) = CLng(32767)
        m_lOnBits(15) = CLng(65535)
        m_lOnBits(16) = CLng(131071)
        m_lOnBits(17) = CLng(262143)
        m_lOnBits(18) = CLng(524287)
        m_lOnBits(19) = CLng(1048575)
        m_lOnBits(20) = CLng(2097151)
        m_lOnBits(21) = CLng(4194303)
        m_lOnBits(22) = CLng(8388607)
        m_lOnBits(23) = CLng(16777215)
        m_lOnBits(24) = CLng(33554431)
        m_lOnBits(25) = CLng(67108863)
        m_lOnBits(26) = CLng(134217727)
        m_lOnBits(27) = CLng(268435455)
        m_lOnBits(28) = CLng(536870911)
        m_lOnBits(29) = CLng(1073741823)
        m_lOnBits(30) = CLng(2147483647)

        m_l2Power(0) = CLng(1)
        m_l2Power(1) = CLng(2)
        m_l2Power(2) = CLng(4)
        m_l2Power(3) = CLng(8)
        m_l2Power(4) = CLng(16)
        m_l2Power(5) = CLng(32)
        m_l2Power(6) = CLng(64)
        m_l2Power(7) = CLng(128)
        m_l2Power(8) = CLng(256)
        m_l2Power(9) = CLng(512)
        m_l2Power(10) = CLng(1024)
        m_l2Power(11) = CLng(2048)
        m_l2Power(12) = CLng(4096)
        m_l2Power(13) = CLng(8192)
        m_l2Power(14) = CLng(16384)
        m_l2Power(15) = CLng(32768)
        m_l2Power(16) = CLng(65536)
        m_l2Power(17) = CLng(131072)
        m_l2Power(18) = CLng(262144)
        m_l2Power(19) = CLng(524288)
        m_l2Power(20) = CLng(1048576)
        m_l2Power(21) = CLng(2097152)
        m_l2Power(22) = CLng(4194304)
        m_l2Power(23) = CLng(8388608)
        m_l2Power(24) = CLng(16777216)
        m_l2Power(25) = CLng(33554432)
        m_l2Power(26) = CLng(67108864)
        m_l2Power(27) = CLng(134217728)
        m_l2Power(28) = CLng(268435456)
        m_l2Power(29) = CLng(536870912)
        m_l2Power(30) = CLng(1073741824)

    End Sub

    Public Function HTMLDecode(sText)
        Dim regEx
        Dim matches
        Dim match
        sText = Replace(sText, "&quot;", Chr(34))
        sText = Replace(sText, "&lt;"  , Chr(60))
        sText = Replace(sText, "&gt;"  , Chr(62))
        sText = Replace(sText, "&amp;" , Chr(38))
        sText = Replace(sText, "&nbsp;", Chr(32))

        Set regEx= New RegExp

        With regEx
         .Pattern = "&#(\d+);" 'Match html unicode escapes
         .Global = True
        End With

        Set matches = regEx.Execute(sText)

        'Iterate over matches
        For Each match in matches
        'For each unicode match, replace the whole match, with the ChrW of the digits.
            sText = Replace(sText, match.Value, "\U"&WordToHex(match.SubMatches(0)))

        HTMLDecode = sText
    End Function

    Private Function WordToHex(lValue)
        Dim lByte
        Dim lCount

        For lCount = 0 To 3
            lByte = RShift(lValue, lCount * BITS_TO_A_BYTE) And m_lOnBits(BITS_TO_A_BYTE - 1)
             WordToHex = Right("0" & Hex(lByte), 2) & WordToHex 
    End Function

    Private Function RShift(lValue, iShiftBits)
        If iShiftBits = 0 Then
            RShift = lValue
            Exit Function
        ElseIf iShiftBits = 31 Then
            If lValue And &H80000000 Then
                RShift = 1
                RShift = 0
            End If
            Exit Function
        ElseIf iShiftBits < 0 Or iShiftBits > 31 Then
            Err.Raise 6
        End If

        RShift = (lValue And &H7FFFFFFE) \ m_l2Power(iShiftBits)

        If (lValue And &H80000000) Then
            RShift = (RShift Or (&H40000000 \ m_l2Power(iShiftBits - 1)))
        End If
    End Function

    End Class




