///////////////////////////////////////////////////////////////////////////////
// Name: src/common/markupparser.cpp
// Purpose: Implementation of wxMarkupParser.
// Author: Vadim Zeitlin
// Created: 2011-02-16
// Copyright: (c) 2011 Vadim Zeitlin <vadim@wxwidgets.org>
// Licence: wxWindows licence
///////////////////////////////////////////////////////////////////////////////
// ============================================================================
// declarations
// ============================================================================
// ----------------------------------------------------------------------------
// headers
// ----------------------------------------------------------------------------
// for compilers that support precompilation, includes "wx.h".
#include "wx/wxprec.h"
#ifdef __BORLANDC__
#pragma hdrstop
#endif
#if wxUSE_MARKUP
#ifndef WX_PRECOMP
#include "wx/log.h"
#endif
#include "wx/private/markupparser.h"
#include "wx/stack.h"
namespace
{
// ----------------------------------------------------------------------------
// constants
// ----------------------------------------------------------------------------
// Array containing the predefined XML 1.0 entities.
const struct XMLEntity
{
const char *name;
int len; // == strlen(name)
char value;
} xmlEntities[] =
{
{ "lt", 2, '<' },
{ "gt", 2, '>' },
{ "amp", 3, '&' },
{ "apos", 4, '\''},
{ "quot", 4, '"' },
};
// ----------------------------------------------------------------------------
// helper functions
// ----------------------------------------------------------------------------
wxString
ExtractUntil(char ch, wxString::const_iterator& it, wxString::const_iterator end)
{
wxString str;
for ( ; it != end; ++it )
{
if ( *it == ch )
return str;
str += *it;
}
// Return empty string to indicate that we didn't find ch at all.
return wxString();
}
} // anonymous namespace
// ============================================================================
// wxMarkupParser implementation
// ============================================================================
wxString
wxMarkupParser::ParseAttrs(wxString attrs, TagAndAttrs& tagAndAttrs)
{
if ( tagAndAttrs.name.CmpNoCase("span") != 0 && !attrs.empty() )
{
return wxString::Format("tag \"%s\" can't have attributes",
tagAndAttrs.name);
}
// TODO: Parse more attributes described at
// http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html
// and at least ignore them gracefully instead of giving errors (but
// quite a few of them could be supported as well, notable font_desc).
wxMarkupSpanAttributes& spanAttrs = tagAndAttrs.attrs;
while ( !attrs.empty() )
{
wxString rest;
const wxString attr = attrs.BeforeFirst(' ', &rest);
attrs = rest;
// The "original" versions are used for error messages only.
wxString valueOrig;
const wxString nameOrig = attr.BeforeFirst('=', &valueOrig);
const wxString name = nameOrig.Lower();
wxString value = valueOrig.Lower();
// All attributes values must be quoted.
if ( value.length() < 2 ||
(value[0] != value.Last()) ||
(value[0] != '"' && value[0] != '\'') )
{
return wxString::Format("bad quoting for value of \"%s\"",
nameOrig);
}
value.assign(value, 1, value.length() - 2);
if ( name == "foreground" || name == "fgcolor" || name == "color" )
{
spanAttrs.m_fgCol = value;
}
else if ( name == "background" || name == "bgcolor" )
{
spanAttrs.m_bgCol = value;
}
else if ( name == "font_family" || name == "face" )
{
spanAttrs.m_fontFace = value;
}
else if ( name == "font_weight" || name == "weight" )
{
unsigned long weight;
if ( value == "ultralight" || value == "light" || value == "normal" )
spanAttrs.m_isBold = wxMarkupSpanAttributes::No;
else if ( value == "bold" || value == "ultrabold" || value == "heavy" )
spanAttrs.m_isBold = wxMarkupSpanAttributes::Yes;
else if ( value.ToULong(&weight) )
spanAttrs.m_isBold = weight >= 600 ? wxMarkupSpanAttributes::Yes
: wxMarkupSpanAttributes::No;
else
return wxString::Format("invalid font weight \"%s\"", valueOrig);
}
else if ( name == "font_style" || name == "style" )
{
if ( value == "normal" )
spanAttrs.m_isItalic = wxMarkupSpanAttributes::No;
else if ( value == "oblique" || value == "italic" )
spanAttrs.m_isItalic = wxMarkupSpanAttributes::Yes;
else
return wxString::Format("invalid font style \"%s\"", valueOrig);
}
else if ( name == "size" )
{
unsigned long size;
if ( value.ToULong(&size) )
{
spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_PointParts;
spanAttrs.m_fontSize = size;
}
else if ( value == "smaller" || value == "larger" )
{
spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Relative;
spanAttrs.m_fontSize = value == "smaller" ? -1 : +1;
}
else // Must be a CSS-like size specification
{
int cssSize = 1;
if ( value.StartsWith("xx-", &rest) )
cssSize = 3;
else if ( value.StartsWith("x-", &rest) )
cssSize = 2;
else if ( value == "medium" )
cssSize = 0;
else
rest = value;
if ( cssSize != 0 )
{
if ( rest == "small" )
cssSize = -cssSize;
else if ( rest != "large" )
return wxString::Format("invalid font size \"%s\"",
valueOrig);
}
spanAttrs.m_sizeKind = wxMarkupSpanAttributes::Size_Symbolic;
spanAttrs.m_fontSize = cssSize;
}
}
}
return wxString();
}
bool wxMarkupParser::OutputTag(const TagAndAttrs& tagAndAttrs, bool start)
{
if ( tagAndAttrs.name.CmpNoCase("span") == 0 )
{
if ( start )
m_output.OnSpanStart(tagAndAttrs.attrs);
else
m_output.OnSpanEnd(tagAndAttrs.attrs);
return true;
}
else // non-span tag
{
static const struct TagHandler
{
const char *name;
void (wxMarkupParserOutput::*startFunc)();
void (wxMarkupParserOutput::*endFunc)();
} tagHandlers[] =
{
{ "b", &wxMarkupParserOutput::OnBoldStart,
&wxMarkupParserOutput::OnBoldEnd },
{ "i", &wxMarkupParserOutput::OnItalicStart,
&wxMarkupParserOutput::OnItalicEnd },
{ "u", &wxMarkupParserOutput::OnUnderlinedStart,
&wxMarkupParserOutput::OnUnderlinedEnd },
{ "s", &wxMarkupParserOutput::OnStrikethroughStart,
&wxMarkupParserOutput::OnStrikethroughEnd },
{ "big", &wxMarkupParserOutput::OnBigStart,
&wxMarkupParserOutput::OnBigEnd },
{ "small", &wxMarkupParserOutput::OnSmallStart,
&wxMarkupParserOutput::OnSmallEnd },
{ "tt", &wxMarkupParserOutput::OnTeletypeStart,
&wxMarkupParserOutput::OnTeletypeEnd },
};
for ( unsigned n = 0; n < WXSIZEOF(tagHandlers); n++ )
{
const TagHandler& h = tagHandlers[n];
if ( tagAndAttrs.name.CmpNoCase(h.name) == 0 )
{
if ( start )
(m_output.*(h.startFunc))();
else
(m_output.*(h.endFunc))();
return true;
}
}
}
// Unknown tag name.
return false;
}
bool wxMarkupParser::Parse(const wxString& text)
{
// The stack containing the names and corresponding attributes (which are
// actually only used for <span> tags) of all of the currently opened tag
// or none if we're not inside any tag.
wxStack<TagAndAttrs> tags;
// Current run of text.
wxString current;
const wxString::const_iterator end = text.end();
for ( wxString::const_iterator it = text.begin(); it != end; ++it )
{
switch ( (*it).GetValue() )
{
case '<':
{
// Flush the text preceding the tag, if any.
if ( !current.empty() )
{
m_output.OnText(current);
current.clear();
}
// This variable is used only in the debugging messages
// and doesn't need to be defined if they're not compiled
// at all (it actually would result in unused variable
// messages in this case).
#if wxUSE_LOG_DEBUG || !defined(HAVE_VARIADIC_MACROS)
// Remember the tag starting position for the error
// messages.
const size_t pos = it - text.begin();
#endif
bool start = true;
if ( ++it != end && *it == '/' )
{
start = false;
++it;
}
const wxString tag = ExtractUntil('>', it, end);
if ( tag.empty() )
{
wxLogDebug("%s at %lu.",
it == end ? "Unclosed tag starting"
: "Empty tag",
pos);
return false;
}
if ( start )
{
wxString attrs;
const wxString name = tag.BeforeFirst(' ', &attrs);
TagAndAttrs tagAndAttrs(name);
const wxString err = ParseAttrs(attrs, tagAndAttrs);
if ( !err.empty() )
{
wxLogDebug("Bad attributes for \"%s\" "
"at %lu: %s.",
name, pos, err);
return false;
}
tags.push(tagAndAttrs);
}
else // end tag
{
if ( tags.empty() || tags.top().name != tag )
{
wxLogDebug("Unmatched closing tag \"%s\" at %lu.",
tag, pos);
return false;
}
}
if ( !OutputTag(tags.top(), start) )
{
wxLogDebug("Unknown tag at %lu.", pos);
return false;
}
if ( !start )
tags.pop();
}
break;
case '>':
wxLogDebug("'>' should be escaped as \">\"; at %lu.",
it - text.begin());
break;
case '&':
// Processing is somewhat complicated: we need to recognize at
// least the "<" entity to allow escaping left square
// brackets in the markup and, in fact, we recognize all of the
// standard XML entities for consistency with Pango markup
// parsing.
//
// However we also allow '&' to appear unescaped, i.e. directly
// and not as "&" when it is used to introduce the mnemonic
// for the label. In this case we simply leave it alone.
//
// Notice that this logic makes it impossible to have a label
// with "lt;" inside it and using "l" as mnemonic but hopefully
// this shouldn't be a problem in practice.
{
const size_t pos = it - text.begin() + 1;
unsigned n;
for ( n = 0; n < WXSIZEOF(xmlEntities); n++ )
{
const XMLEntity& xmlEnt = xmlEntities[n];
if ( text.compare(pos, xmlEnt.len, xmlEnt.name) == 0
&& text[pos + xmlEnt.len] == ';' )
{
current += xmlEnt.value;
it += xmlEnt.len + 1; // +1 for '&' itself
break;
}
}
if ( n < WXSIZEOF(xmlEntities) )
break;
wxFALLTHROUGH;//else: fall through, '&' is not special
}
default:
current += *it;
}
}
if ( !tags.empty() )
{
wxLogDebug("Missing closing tag for \"%s\"", tags.top().name);
return false;
}
if ( !current.empty() )
m_output.OnText(current);
return true;
}
/* static */
wxString wxMarkupParser::Quote(const wxString& text)
{
wxString quoted;
quoted.reserve(text.length());
for ( wxString::const_iterator it = text.begin(); it != text.end(); ++it )
{
unsigned n;
for ( n = 0; n < WXSIZEOF(xmlEntities); n++ )
{
const XMLEntity& xmlEnt = xmlEntities[n];
if ( *it == xmlEnt.value )
{
quoted << '&' << xmlEnt.name << ';';
break;
}
}
if ( n == WXSIZEOF(xmlEntities) )
quoted += *it;
}
return quoted;
}
/* static */
wxString wxMarkupParser::Strip(const wxString& text)
{
class StripOutput : public wxMarkupParserOutput
{
public:
StripOutput() { }
const wxString& GetText() const { return m_text; }
virtual void OnText(const wxString& string) wxOVERRIDE { m_text += string; }
virtual void OnBoldStart() wxOVERRIDE { }
virtual void OnBoldEnd() wxOVERRIDE { }
virtual void OnItalicStart() wxOVERRIDE { }
virtual void OnItalicEnd() wxOVERRIDE { }
virtual void OnUnderlinedStart() wxOVERRIDE { }
virtual void OnUnderlinedEnd() wxOVERRIDE { }
virtual void OnStrikethroughStart() wxOVERRIDE { }
virtual void OnStrikethroughEnd() wxOVERRIDE { }
virtual void OnBigStart() wxOVERRIDE { }
virtual void OnBigEnd() wxOVERRIDE { }
virtual void OnSmallStart() wxOVERRIDE { }
virtual void OnSmallEnd() wxOVERRIDE { }
virtual void OnTeletypeStart() wxOVERRIDE { }
virtual void OnTeletypeEnd() wxOVERRIDE { }
virtual void OnSpanStart(const wxMarkupSpanAttributes& WXUNUSED(a)) wxOVERRIDE { }
virtual void OnSpanEnd(const wxMarkupSpanAttributes& WXUNUSED(a)) wxOVERRIDE { }
private:
wxString m_text;
};
StripOutput output;
wxMarkupParser parser(output);
if ( !parser.Parse(text) )
return wxString();
return output.GetText();
}
#endif // wxUSE_MARKUP
↑ V813 Decreased performance. The 'end' argument should probably be rendered as a constant reference.