Advertisement
2002ASP String Manipulation #275

rtf2html-2.1

This code recieves RTF code as output by a Rich Text Box in VB or MS Word. It outputs the equivalent in HTML. It's in a somewhat BETA form in that it handles a number of but not all of the possible codes. If you encounter a code it doesn't properly convert just send it to me and I'll try to fix the function within 24 hours. I think it does a better job on uncomplicated text than MS Word's HTML conversion.

AI

Riepilogo AI: This codebase represents a historical implementation of the logic described in the metadata. Our preservation engine analyzes the structure to provide context for modern developers.

Codice sorgente
original-source
Function RTF2HTML(strRTF As String) As String
  'Version 2.1 (3/30/99)
  
  'The most current version of this function is available at
  'http://www2.bitstream.net/~bradyh/downloads/rtf2html.zip
  
  'Converts Rich Text encoded text to HTML format
  'if you find some text that this function doesn't
  'convert properly please email the text to
  '[email protected]
  Dim strHTML As String
  Dim l As Long
  Dim lTmp As Long
  Dim lRTFLen As Long
  Dim lBOS As Long         'beginning of section
  Dim lEOS As Long         'end of section
  Dim strTmp As String
  Dim strTmp2 As String
  Dim strEOS            'string to be added to end of section
  Const gHellFrozenOver = False  'always false
  Dim gSkip As Boolean       'skip to next word/command
  Dim strCodes As String      'codes for ascii to HTML char conversion
  
  strCodes = "  {00}© {a9}´ {b4}« {ab}» {bb}¡ {a1}¿{bf}À{c0}à{e0}Á{c1}"
  strCodes = strCodes & "á{e1}Â {c2}â {e2}Ã{c3}ã{e3}Ä {c4}ä {e4}Å {c5}å {e5}Æ {c6}"
  strCodes = strCodes & "æ {e6}Ç{c7}ç{e7}Ð  {d0}ð  {f0}È{c8}è{e8}É{c9}é{e9}Ê {ca}"
  strCodes = strCodes & "ê {ea}Ë {cb}ë {eb}Ì{cc}ì{ec}Í{cd}í{ed}Î {ce}î {ee}Ï {cf}"
  strCodes = strCodes & "ï {ef}Ñ{d1}ñ{f1}Ò{d2}ò{f2}Ó{d3}ó{f3}Ô {d4}ô {f4}Õ{d5}"
  strCodes = strCodes & "õ{f5}Ö {d6}ö {f6}Ø{d8}ø{f8}Ù{d9}ù{f9}Ú{da}ú{fa}Û {db}"
  strCodes = strCodes & "û {fb}Ü {dc}ü {fc}Ý{dd}ý{fd}ÿ {ff}Þ {de}þ {fe}ß {df}§ {a7}"
  strCodes = strCodes & "¶ {b6}µ {b5}¦{a6}±{b1}·{b7}¨  {a8}¸ {b8}ª {aa}º {ba}¬  {ac}"
  strCodes = strCodes & "­  {ad}¯ {af}°  {b0}¹ {b9}² {b2}³ {b3}¼{bc}½{bd}¾{be}× {d7}"
  strCodes = strCodes & "÷{f7}¢ {a2}£ {a3}¤{a4}¥  {a5}"
  strHTML = ""
  lRTFLen = Len(strRTF)
  'seek first line with text on it
  lBOS = InStr(strRTF, vbCrLf & "\deflang")
  If lBOS = 0 Then GoTo finally Else lBOS = lBOS + 2
  lEOS = InStr(lBOS, strRTF, vbCrLf & "\par")
  If lEOS = 0 Then GoTo finally
  While Not gHellFrozenOver
    strTmp = Mid(strRTF, lBOS, lEOS - lBOS)
    l = lBOS
    While l <= lEOS
      strTmp = Mid(strRTF, l, 1)
      Select Case strTmp
      Case "{"
        l = l + 1
      Case "}"
        strHTML = strHTML & strEOS
        l = l + 1
      Case "\"  'special code
        l = l + 1
        strTmp = Mid(strRTF, l, 1)
        Select Case strTmp
        Case "b"
          If ((Mid(strRTF, l + 1, 1) = " ") Or (Mid(strRTF, l + 1, 1) = "\")) Then
            strHTML = strHTML & "<B>"
            strEOS = "</B>" & strEOS
            If (Mid(strRTF, l + 1, 1) = " ") Then l = l + 1
          ElseIf (Mid(strRTF, l, 7) = "bullet ") Then
            strHTML = strHTML & "•"  'bullet
            l = l + 6
          Else
            gSkip = True
          End If
        Case "e"
          If (Mid(strRTF, l, 7) = "emdash ") Then
            strHTML = strHTML & "—"
            l = l + 6
          Else
            gSkip = True
          End If
        Case "i"
          If ((Mid(strRTF, l + 1, 1) = " ") Or (Mid(strRTF, l + 1, 1) = "\")) Then
            strHTML = strHTML & "<I>"
            strEOS = "</I>" & strEOS
            If (Mid(strRTF, l + 1, 1) = " ") Then l = l + 1
          Else
            gSkip = True
          End If
        Case "l"
          If (Mid(strRTF, l, 10) = "ldblquote ") Then
            strHTML = strHTML & "“"
            l = l + 9
          ElseIf (Mid(strRTF, l, 7) = "lquote ") Then
            strHTML = strHTML & "‘"
            l = l + 6
          Else
            gSkip = True
          End If
        Case "p"
          If ((Mid(strRTF, l, 6) = "plain\") Or (Mid(strRTF, l, 6) = "plain ")) Then
            strHTML = strHTML & strEOS
            strEOS = ""
            If Mid(strRTF, l + 5, 1) = "\" Then l = l + 4 Else l = l + 5  'catch next \ but skip a space
          Else
            gSkip = True
          End If
        Case "r"
          If (Mid(strRTF, l, 7) = "rquote ") Then
            strHTML = strHTML & "’"
            l = l + 6
          ElseIf (Mid(strRTF, l, 10) = "rdblquote ") Then
            strHTML = strHTML & "”"
            l = l + 9
          Else
            gSkip = True
          End If
        Case "t"
          If (Mid(strRTF, l, 4) = "tab ") Then
            strHTML = strHTML & Chr$(9)  'tab
            l = l + 3
          Else
            gSkip = True
          End If
        Case "'"
          strTmp2 = "{" & Mid(strRTF, l + 1, 2) & "}"
          lTmp = InStr(strCodes, strTmp2)
          If lTmp = 0 Then
            strHTML = strHTML & Chr("&H" & Mid(strTmp2, 2, 2))
          Else
            strHTML = strHTML & Trim(Mid(strCodes, lTmp - 8, 8))
          End If
          l = l + 2
        Case "~"
          strHTML = strHTML & " "
        Case "{", "}", "\"
          strHTML = strHTML & strTmp
        Case vbLf, vbCr, vbCrLf  'always use vbCrLf
          strHTML = strHTML & vbCrLf
        Case Else
          gSkip = True
        End Select
        If gSkip = True Then
          'skip everything up until the next space or "\"
          While ((Mid(strRTF, l, 1) <> " ") And (Mid(strRTF, l, 1) <> "\"))
            l = l + 1
          Wend
          gSkip = False
          If (Mid(strRTF, l, 1) = "\") Then l = l - 1
        End If
        l = l + 1
      Case vbLf, vbCr, vbCrLf
        l = l + 1
      Case Else
        strHTML = strHTML & strTmp
        l = l + 1
      End Select
    Wend
        
    lBOS = lEOS + 2
    lEOS = InStr(lEOS + 1, strRTF, vbCrLf & "\par")
    If lEOS = 0 Then GoTo finally
    
    strHTML = strHTML & "<br>"
  Wend
  
finally:
  RTF2HTML = strHTML
End Function
Commenti originali (3)
Recuperato da Wayback Machine