question

HopefulDear-7150 avatar image
1 Vote"
HopefulDear-7150 asked emizhang-msft edited

(On MacOS) How can I format (or decode) HTML entities and tags in excel using vba?

Using VBA I have been able to attain and place strings like the below within an excel cell. But they currently still contain both HTML tags and entities. How can I show these in their appropriately formatted form, within an excel cell?

<sup>3</sup>&#8260;<sub>10</sub> is larger than <sup>3</sup>&#8260;<sub>100</sub>, <sup>33</sup>&#8260;<sub>1,000</sub> or <sup>3</sup>&#8260;<sub>1,000</sub>

This is what I'm trying to achieve:

gdvIp.png


Image originally from link.

I have attempted many of the approaches already suggested within different stack overflow threads, but as they all focus toward the Windows O.S most make use of libraries and browsers which the Mac version of excel simply doesn't have access to.



Errors such:

Run-time error '429:

ActiveX component can't create object


When attempting this approach:

 Sub Sample()
     Dim Ie As Object
     Set Ie = CreateObject("InternetExplorer.Application")
     With Ie
         .Visible = False
         .Navigate "about:blank"
         .document.body.InnerHTML = Sheets("Sheet1").Range("I2").Value
              'update to the cell that contains HTML you want converted
         .ExecWB 17, 0
              'Select all contents in browser
         .ExecWB 12, 2
              'Copy them
         ActiveSheet.Paste Destination:=Sheets("Sheet1").Range("J2")
              'update to cell you want converted HTML pasted in
         .Quit
     End With
 End Sub


Or this approach:


 Function HtmlDecode(str As String) As String
     Dim dom As Object
    
     Set dom = CreateObject("htmlfile")
     dom.Open
     dom.Write str
     dom.Close
     HtmlDecode = dom.Body.innerText
 End Function


Compiler Error: User-Defined Type not defined

When attempting this approach:

 Option Explicit
    
 Private Const ERROR_UNEXPECTED_ENTITY_COUNT As Long = vbObjectError Or 1
 Private Const ERROR_UNEXPECTED_ENTITY_DUPLICATE As Long = vbObjectError Or 2
 Private Const ERROR_UNEXPECTED_ENTITY_VALUE As Long = vbObjectError Or 3
 Private Const ERROR_UNEXPECTED_ENTITY_FORMAT As Long = vbObjectError Or 4
    
 Private Function CreateEntityDictionary() As Scripting.Dictionary
    
   Const PROC_NAME As String = "getEntityDictionary"
    
   Const ENTITY_NAMES = _
     "quot,amp,apos,lt,gt,nbsp,iexcl,cent,pound,curren,yen,brvbar,sect,uml,copy,ordf," & _
     "laquo,not,shy,reg,macr,deg,plusmn,sup2,sup3,acute,micro,para,middot,cedil,sup1,ordm," & _
     "raquo,frac14,frac12,frac34,iquest,Agrave,Aacute,Acirc,Atilde,Auml,Aring,AElig,Ccedil,Egrave,Eacute,Ecirc," & _
     "Euml,Igrave,Iacute,Icirc,Iuml,ETH,Ntilde,Ograve,Oacute,Ocirc,Otilde,Ouml,times,Oslash,Ugrave,Uacute," & _
     "Ucirc,Uuml,Yacute,THORN,szlig,agrave,aacute,acirc,atilde,auml,aring,aelig,ccedil,egrave,eacute,ecirc," & _
     "euml,igrave,iacute,icirc,iuml,eth,ntilde,ograve,oacute,ocirc,otilde,ouml,divide,oslash,ugrave,uacute," & _
     "ucirc,uuml,yacute,thorn,yuml,OElig,oelig,Scaron,scaron,Yuml,fnof,circ,tilde,Alpha,Beta,Gamma," & _
     "Delta,Epsilon,Zeta,Eta,Theta,Iota,Kappa,Lambda,Mu,Nu,Xi,Omicron,Pi,Rho,Sigma,Tau," & _
     "Upsilon,Phi,Chi,Psi,Omega,alpha,beta,gamma,delta,epsilon,zeta,eta,theta,iota,kappa,lambda," & _
     "mu,nu,xi,omicron,pi,rho,sigmaf,sigma,tau,upsilon,phi,chi,psi,omega,thetasym,upsih," & _
     "piv,ensp,emsp,thinsp,zwnj,zwj,lrm,rlm,ndash,mdash,lsquo,rsquo,sbquo,ldquo,rdquo,bdquo," & _
     "dagger,Dagger,bull,hellip,permil,prime,Prime,lsaquo,rsaquo,oline,frasl,euro,image,weierp,real,trade," & _
     "alefsym,larr,uarr,rarr,darr,harr,crarr,lArr,uArr,rArr,dArr,hArr,forall,part,exist,empty," & _
     "nabla,isin,notin,ni,prod,sum,minus,lowast,radic,prop,infin,ang,and,or,cap,cup," & _
     "int,there4,sim,cong,asymp,ne,equiv,le,ge,sub,sup,nsub,sube,supe,oplus,otimes," & _
     "perp,sdot,lceil,rceil,lfloor,rfloor,lang,rang,loz,spades,clubs,hearts,diams"
    
   Const ENTITY_VALUES = _
     "34,38,39,60,62,160,161,162,163,164,165,166,167,168,169,170," & _
     "171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186," & _
     "187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202," & _
     "203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218," & _
     "219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234," & _
     "235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250," & _
     "251,252,253,254,255,338,339,352,353,376,402,710,732,913,914,915," & _
     "916,917,918,919,920,921,922,923,924,925,926,927,928,929,931,932," & _
     "933,934,935,936,937,945,946,947,948,949,950,951,952,953,954,955," & _
     "956,957,958,959,960,961,962,963,964,965,966,967,968,969,977,978," & _
     "982,8194,8195,8201,8204,8205,8206,8207,8211,8212,8216,8217,8218,8220,8221,8222," & _
     "8224,8225,8226,8230,8240,8242,8243,8249,8250,8254,8260,8364,8465,8472,8476,8482," & _
     "8501,8592,8593,8594,8595,8596,8629,8656,8657,8658,8659,8660,8704,8706,8707,8709," & _
     "8711,8712,8713,8715,8719,8721,8722,8727,8730,8733,8734,8736,8743,8744,8745,8746," & _
     "8747,8756,8764,8773,8776,8800,8801,8804,8805,8834,8835,8836,8838,8839,8853,8855," & _
     "8869,8901,8968,8969,8970,8971,9001,9002,9674,9824,9827,9829,9830"
    
   Const ENTITY_DELIMITER As String = ","
    
   Set CreateEntityDictionary = New Scripting.Dictionary
   'Entity names must be case sensitive
   CreateEntityDictionary.CompareMode = BinaryCompare
    
   Dim entityNames() As String
   entityNames = Split(ENTITY_NAMES, ENTITY_DELIMITER)
    
   Dim entityValues() As String
   entityValues = Split(ENTITY_VALUES, ENTITY_DELIMITER)
    
   If UBound(entityNames) = UBound(entityValues) Then
     With CreateEntityDictionary
       Dim entityCounter As Long
       For entityCounter = LBound(entityNames) To UBound(entityNames)
         If Not .Exists(entityNames(entityCounter)) Then
           If IsNumeric(entityValues(entityCounter)) Then
             .Add entityNames(entityCounter), CLng(entityValues(entityCounter))
           Else
             Err.Raise ERROR_UNEXPECTED_ENTITY_VALUE, PROC_NAME, "Unexpected entity value: " & entityValues(entityCounter)
           End If
         Else
           Err.Raise ERROR_UNEXPECTED_ENTITY_DUPLICATE, PROC_NAME, "Unexpected duplicate entity name: " & entityNames(entityCounter)
         End If
       Next entityCounter
     End With
   Else
     Err.Raise ERROR_UNEXPECTED_ENTITY_COUNT, PROC_NAME, "Unexpected number of entity names/values"
   End If
    
 End Function
    
 Private Function ExtractCharacterReferences(ByRef sourceText As String) As VBScript_RegExp_55.MatchCollection
    
   Const REFERENCE_PATTERN As String = "&(#{0,1}[x]{0,1})(\w{1,8});"
    
   With New VBScript_RegExp_55.RegExp
     .Global = True
     .Pattern = REFERENCE_PATTERN
     Set ExtractCharacterReferences = .Execute(sourceText)
   End With
    
 End Function

The closest I have gotten is finding this approach, to find and replace a list of html entities with their corresponding symbols:

 Public Function HTMLEntititesDecode(p_strText As String) As String
    
 Dim strTemp As String
     strTemp = p_strText
 strTemp = Replace(strTemp, "&quot;", """")
 strTemp = Replace(strTemp, "&amp;", "&")
 strTemp = Replace(strTemp, "&apos;", "'")
 strTemp = Replace(strTemp, "&lt;", "<")
 strTemp = Replace(strTemp, "&gt;", ">")
 strTemp = Replace(strTemp, "&nbsp;", " ")
 strTemp = Replace(strTemp, "&iexcl;", "¡")
 strTemp = Replace(strTemp, "&cent;", "¢")
 strTemp = Replace(strTemp, "&pound;", "£")
 strTemp = Replace(strTemp, "&curren;", "?")
 strTemp = Replace(strTemp, "&yen;", "¥")
 strTemp = Replace(strTemp, "&brvbar;", "?")
 strTemp = Replace(strTemp, "&sect;", "§")
 strTemp = Replace(strTemp, "&uml;", "¨")
 strTemp = Replace(strTemp, "&copy;", "©")
 strTemp = Replace(strTemp, "&ordf;", "ª")
 strTemp = Replace(strTemp, "&laquo;", "«")
 strTemp = Replace(strTemp, "&not;", "¬")
 strTemp = Replace(strTemp, "*", "")
 strTemp = Replace(strTemp, "&reg;", "®")
 strTemp = Replace(strTemp, "&macr;", "¯")
 strTemp = Replace(strTemp, "&deg;", "°")
 strTemp = Replace(strTemp, "&plusmn;", "±")
 strTemp = Replace(strTemp, "&sup2;", "?")
 strTemp = Replace(strTemp, "&sup3;", "?")
 strTemp = Replace(strTemp, "&acute;", "´")
 strTemp = Replace(strTemp, "&micro;", "µ")
 strTemp = Replace(strTemp, "&para;", "¶")
 strTemp = Replace(strTemp, "&middot;", "·")
 strTemp = Replace(strTemp, "&cedil;", "¸")
 strTemp = Replace(strTemp, "&sup1;", "?")
 strTemp = Replace(strTemp, "&ordm;", "º")
 strTemp = Replace(strTemp, "&raquo;", "»")
 strTemp = Replace(strTemp, "&frac14;", "?")
 strTemp = Replace(strTemp, "&frac12;", "?")
 strTemp = Replace(strTemp, "&frac34;", "?")
 strTemp = Replace(strTemp, "&iquest;", "¿")
 strTemp = Replace(strTemp, "&Agrave;", "À")
 strTemp = Replace(strTemp, "&Aacute;", "Á")
 strTemp = Replace(strTemp, "&Acirc;", "Â")
 strTemp = Replace(strTemp, "&Atilde;", "Ã")
 strTemp = Replace(strTemp, "&Auml;", "Ä")
 strTemp = Replace(strTemp, "&Aring;", "Å")
 strTemp = Replace(strTemp, "&AElig;", "Æ")
 strTemp = Replace(strTemp, "&Ccedil;", "Ç")
 strTemp = Replace(strTemp, "&Egrave;", "È")
 strTemp = Replace(strTemp, "&Eacute;", "É")
 strTemp = Replace(strTemp, "&Ecirc;", "Ê")
 strTemp = Replace(strTemp, "&Euml;", "Ë")
 strTemp = Replace(strTemp, "&Igrave;", "Ì")
 strTemp = Replace(strTemp, "&Iacute;", "Í")
 strTemp = Replace(strTemp, "&Icirc;", "Î")
 strTemp = Replace(strTemp, "&Iuml;", "Ï")
 strTemp = Replace(strTemp, "&ETH;", "?")
 strTemp = Replace(strTemp, "&Ntilde;", "Ñ")
 strTemp = Replace(strTemp, "&Ograve;", "Ò")
 strTemp = Replace(strTemp, "&Oacute;", "Ó")
 strTemp = Replace(strTemp, "&Ocirc;", "Ô")
 strTemp = Replace(strTemp, "&Otilde;", "Õ")
 strTemp = Replace(strTemp, "&Ouml;", "Ö")
 strTemp = Replace(strTemp, "&times;", "?")
 strTemp = Replace(strTemp, "&Oslash;", "Ø")
 strTemp = Replace(strTemp, "&Ugrave;", "Ù")
 strTemp = Replace(strTemp, "&Uacute;", "Ú")
 strTemp = Replace(strTemp, "&Ucirc;", "Û")
 strTemp = Replace(strTemp, "&Uuml;", "Ü")
 strTemp = Replace(strTemp, "&Yacute;", "?")
 strTemp = Replace(strTemp, "&THORN;", "?")
 strTemp = Replace(strTemp, "&szlig;", "ß")
 strTemp = Replace(strTemp, "&agrave;", "à")
 strTemp = Replace(strTemp, "&aacute;", "á")
 strTemp = Replace(strTemp, "&acirc;", "â")
 strTemp = Replace(strTemp, "&atilde;", "ã")
 strTemp = Replace(strTemp, "&auml;", "ä")
 strTemp = Replace(strTemp, "&aring;", "å")
 strTemp = Replace(strTemp, "&aelig;", "æ")
 strTemp = Replace(strTemp, "&ccedil;", "ç")
 strTemp = Replace(strTemp, "&egrave;", "è")
 strTemp = Replace(strTemp, "&eacute;", "é")
 strTemp = Replace(strTemp, "&ecirc;", "ê")
 strTemp = Replace(strTemp, "&euml;", "ë")
 strTemp = Replace(strTemp, "&igrave;", "ì")
 strTemp = Replace(strTemp, "&iacute;", "í")
 strTemp = Replace(strTemp, "&icirc;", "î")
 strTemp = Replace(strTemp, "&iuml;", "ï")
 strTemp = Replace(strTemp, "&eth;", "?")
 strTemp = Replace(strTemp, "&ntilde;", "ñ")
 strTemp = Replace(strTemp, "&ograve;", "ò")
 strTemp = Replace(strTemp, "&oacute;", "ó")
 strTemp = Replace(strTemp, "&ocirc;", "ô")
 strTemp = Replace(strTemp, "&otilde;", "õ")
 strTemp = Replace(strTemp, "&ouml;", "ö")
 strTemp = Replace(strTemp, "&divide;", "÷")
 strTemp = Replace(strTemp, "&oslash;", "ø")
 strTemp = Replace(strTemp, "&ugrave;", "ù")
 strTemp = Replace(strTemp, "&uacute;", "ú")
 strTemp = Replace(strTemp, "&ucirc;", "û")
 strTemp = Replace(strTemp, "&uuml;", "ü")
 strTemp = Replace(strTemp, "&yacute;", "?")
 strTemp = Replace(strTemp, "&thorn;", "?")
 strTemp = Replace(strTemp, "&yuml;", "ÿ")
 strTemp = Replace(strTemp, "&OElig;", "Œ")
 strTemp = Replace(strTemp, "&oelig;", "œ")
 strTemp = Replace(strTemp, "&Scaron;", "?")
 strTemp = Replace(strTemp, "&scaron;", "?")
 strTemp = Replace(strTemp, "&Yuml;", "Ÿ")
 strTemp = Replace(strTemp, "&fnof;", "ƒ")
 strTemp = Replace(strTemp, "&circ;", "ˆ")
 strTemp = Replace(strTemp, "&tilde;", "˜")
 strTemp = Replace(strTemp, "&thinsp;", "")
 strTemp = Replace(strTemp, "&zwnj;", "")
 strTemp = Replace(strTemp, "&zwj;", "")
 strTemp = Replace(strTemp, "&lrm;", "")
 strTemp = Replace(strTemp, "&rlm;", "")
 strTemp = Replace(strTemp, "&ndash;", "–")
 strTemp = Replace(strTemp, "&mdash;", "—")
 strTemp = Replace(strTemp, "&lsquo;", "‘")
 strTemp = Replace(strTemp, "&rsquo;", "’")
 strTemp = Replace(strTemp, "&sbquo;", "‚")
 strTemp = Replace(strTemp, "&ldquo;", "“")
 strTemp = Replace(strTemp, "&rdquo;", "”")
 strTemp = Replace(strTemp, "&bdquo;", "„")
 strTemp = Replace(strTemp, "&dagger;", "†")
 strTemp = Replace(strTemp, "&Dagger;", "‡")
 strTemp = Replace(strTemp, "&bull;", "•")
 strTemp = Replace(strTemp, "&hellip;", "…")
 strTemp = Replace(strTemp, "&permil;", "‰")
 strTemp = Replace(strTemp, "&lsaquo;", "‹")
 strTemp = Replace(strTemp, "&rsaquo;", "›")
 strTemp = Replace(strTemp, "&euro;", "€")
 strTemp = Replace(strTemp, "&trade;", "™")
   HTMLEntititesDecode = strTemp
 End Function

But this doesn't cover html codes such as &#8260; or apply appropriate styling as indicated by the presence of html tags such <sup> or <sub> etc.

Any suggestions would be thoroughly appreciated. Thank you.



office-vba-dev
5 |1600 characters needed characters left characters exceeded

Up to 10 attachments (including images) can be used with a maximum of 3.0 MiB each and 30.0 MiB total.

0 Answers