1 files changed, 230 insertions, 116 deletions
diff --git a/bitbake/lib/bs4/element.py b/bitbake/lib/bs4/element.py
index da9afdf48e..0e62c2e100 100644
--- a/bitbake/lib/bs4/element.py
+++ b/bitbake/lib/bs4/element.py
@@ -1,3 +1,6 @@
+__license__ = "MIT"
+from pdb import set_trace
 import collections
 import re
 import sys
@@ -21,22 +24,22 @@ def _alias(attr):
    return alias
-class NamespacedAttribute(unicode):
+class NamespacedAttribute(str):
    def __new__(cls, prefix, name, namespace=None):
        if name is None:
-            obj = unicode.__new__(cls, prefix)
+            obj = str.__new__(cls, prefix)
        elif prefix is None:
            # Not really namespaced.
-            obj = unicode.__new__(cls, name)
+            obj = str.__new__(cls, name)
        else:
-            obj = unicode.__new__(cls, prefix + ":" + name)
+            obj = str.__new__(cls, prefix + ":" + name)
        obj.prefix = prefix
        obj.name = name
        obj.namespace = namespace
        return obj
-class AttributeValueWithCharsetSubstitution(unicode):
+class AttributeValueWithCharsetSubstitution(str):
    """A stand-in object for a character encoding specified in HTML."""
 class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
@@ -47,7 +50,7 @@ class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
    """
    def __new__(cls, original_value):
-        obj = unicode.__new__(cls, original_value)
+        obj = str.__new__(cls, original_value)
        obj.original_value = original_value
        return obj
@@ -70,9 +73,9 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
        match = cls.CHARSET_RE.search(original_value)
        if match is None:
            # No substitution necessary.
-            return unicode.__new__(unicode, original_value)
+            return str.__new__(str, original_value)
-        obj = unicode.__new__(cls, original_value)
+        obj = str.__new__(cls, original_value)
        obj.original_value = original_value
        return obj
@@ -152,7 +155,7 @@ class PageElement(object):
    def format_string(self, s, formatter='minimal'):
        """Format the given string using the given formatter."""
-        if not callable(formatter):
+        if not isinstance(formatter, collections.Callable):
            formatter = self._formatter_for_name(formatter)
        if formatter is None:
            output = s
@@ -185,24 +188,40 @@ class PageElement(object):
            return self.HTML_FORMATTERS.get(
                name, HTMLAwareEntitySubstitution.substitute_xml)
-    def setup(self, parent=None, previous_element=None):
+    def setup(self, parent=None, previous_element=None, next_element=None,
+              previous_sibling=None, next_sibling=None):
        """Sets up the initial relations between this element and
        other elements."""
        self.parent = parent
        self.previous_element = previous_element
        if previous_element is not None:
            self.previous_element.next_element = self
-        self.next_element = None
-        self.previous_sibling = None
+        self.next_element = next_element
-        self.next_sibling = None
+        if self.next_element:
-        if self.parent is not None and self.parent.contents:
+            self.next_element.previous_element = self
-            self.previous_sibling = self.parent.contents[-1]
+        self.next_sibling = next_sibling
+        if self.next_sibling:
+            self.next_sibling.previous_sibling = self
+        if (not previous_sibling
+            and self.parent is not None and self.parent.contents):
+            previous_sibling = self.parent.contents[-1]
+        self.previous_sibling = previous_sibling
+        if previous_sibling:
            self.previous_sibling.next_sibling = self
    nextSibling = _alias("next_sibling")  # BS3
    previousSibling = _alias("previous_sibling")  # BS3
    def replace_with(self, replace_with):
+        if not self.parent:
+            raise ValueError(
+                "Cannot replace one element with another when the"
+                "element to be replaced is not part of a tree.")
        if replace_with is self:
            return
        if replace_with is self.parent:
@@ -216,6 +235,10 @@ class PageElement(object):
    def unwrap(self):
        my_parent = self.parent
+        if not self.parent:
+            raise ValueError(
+                "Cannot replace an element with its contents when that"
+                "element is not part of a tree.")
        my_index = self.parent.index(self)
        self.extract()
        for child in reversed(self.contents[:]):
@@ -240,17 +263,20 @@ class PageElement(object):
        last_child = self._last_descendant()
        next_element = last_child.next_element
-        if self.previous_element is not None:
+        if (self.previous_element is not None and
+            self.previous_element is not next_element):
            self.previous_element.next_element = next_element
-        if next_element is not None:
+        if next_element is not None and next_element is not self.previous_element:
            next_element.previous_element = self.previous_element
        self.previous_element = None
        last_child.next_element = None
        self.parent = None
-        if self.previous_sibling is not None:
+        if (self.previous_sibling is not None
+            and self.previous_sibling is not self.next_sibling):
            self.previous_sibling.next_sibling = self.next_sibling
-        if self.next_sibling is not None:
+        if (self.next_sibling is not None
+            and self.next_sibling is not self.previous_sibling):
            self.next_sibling.previous_sibling = self.previous_sibling
        self.previous_sibling = self.next_sibling = None
        return self
@@ -263,16 +289,18 @@ class PageElement(object):
            last_child = self
            while isinstance(last_child, Tag) and last_child.contents:
                last_child = last_child.contents[-1]
-        if not accept_self and last_child == self:
+        if not accept_self and last_child is self:
            last_child = None
        return last_child
    # BS3: Not part of the API!
    _lastRecursiveChild = _last_descendant
    def insert(self, position, new_child):
+        if new_child is None:
+            raise ValueError("Cannot insert None into a tag.")
        if new_child is self:
            raise ValueError("Cannot insert a tag into itself.")
-        if (isinstance(new_child, basestring)
+        if (isinstance(new_child, str)
            and not isinstance(new_child, NavigableString)):
            new_child = NavigableString(new_child)
@@ -478,6 +506,10 @@ class PageElement(object):
    def _find_all(self, name, attrs, text, limit, generator, **kwargs):
        "Iterates over a generator looking for things that match."
+        if text is None and 'string' in kwargs:
+            text = kwargs['string']
+            del kwargs['string']
        if isinstance(name, SoupStrainer):
            strainer = name
        else:
@@ -489,7 +521,7 @@ class PageElement(object):
                result = (element for element in generator
                          if isinstance(element, Tag))
                return ResultSet(strainer, result)
-            elif isinstance(name, basestring):
+            elif isinstance(name, str):
                # Optimization to find all tags with a given name.
                result = (element for element in generator
                          if isinstance(element, Tag)
@@ -548,17 +580,17 @@ class PageElement(object):
    # Methods for supporting CSS selectors.
-    tag_name_re = re.compile('^[a-z0-9]+$')
+    tag_name_re = re.compile('^[a-zA-Z0-9][-.a-zA-Z0-9:_]*$')
-    # /^(\w+)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/
+    # /^([a-zA-Z0-9][-.a-zA-Z0-9:_]*)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/
-    #   \---/  \---/\-------------/    \-------/
+    #   \---------------------------/  \---/\-------------/    \-------/
-    #     |      |         |               |
+    #     |                              |         |               |
-    #     |      |         |           The value
+    #     |                              |         |           The value
-    #     |      |    ~,|,^,$,* or =
+    #     |                              |    ~,|,^,$,* or =
-    #     |   Attribute
+    #     |                           Attribute
    #    Tag
    attribselect_re = re.compile(
-        r'^(?P<tag>\w+)?\[(?P<attribute>\w+)(?P<operator>[=~\|\^\$\*]?)' +
+        r'^(?P<tag>[a-zA-Z0-9][-.a-zA-Z0-9:_]*)?\[(?P<attribute>[\w-]+)(?P<operator>[=~\|\^\$\*]?)' +
        r'=?"?(?P<value>[^\]"]*)"?\]$'
        )
@@ -640,7 +672,7 @@ class PageElement(object):
        return self.parents
-class NavigableString(unicode, PageElement):
+class NavigableString(str, PageElement):
    PREFIX = ''
    SUFFIX = ''
@@ -653,15 +685,21 @@ class NavigableString(unicode, PageElement):
        passed in to the superclass's __new__ or the superclass won't know
        how to handle non-ASCII characters.
        """
-        if isinstance(value, unicode):
+        if isinstance(value, str):
-            return unicode.__new__(cls, value)
+            u = str.__new__(cls, value)
-        return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+        else:
+            u = str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+        u.setup()
+        return u
    def __copy__(self):
-        return self
+        """A copy of a NavigableString has the same contents and class
+        as the original, but it is not connected to the parse tree.
+        """
+        return type(self)(self)
    def __getnewargs__(self):
-        return (unicode(self),)
+        return (str(self),)
    def __getattr__(self, attr):
        """text.string gives you text. This is for backwards
@@ -701,23 +739,23 @@ class PreformattedString(NavigableString):
 class CData(PreformattedString):
-    PREFIX = u'<![CDATA['
+    PREFIX = '<![CDATA['
-    SUFFIX = u']]>'
+    SUFFIX = ']]>'
 class ProcessingInstruction(PreformattedString):
-    PREFIX = u'<?'
+    PREFIX = '<?'
-    SUFFIX = u'?>'
+    SUFFIX = '>'
 class Comment(PreformattedString):
-    PREFIX = u'<!--'
+    PREFIX = '<!--'
-    SUFFIX = u'-->'
+    SUFFIX = '-->'
 class Declaration(PreformattedString):
-    PREFIX = u'<!'
+    PREFIX = '<?'
-    SUFFIX = u'!>'
+    SUFFIX = '?>'
 class Doctype(PreformattedString):
@@ -734,8 +772,8 @@ class Doctype(PreformattedString):
        return Doctype(value)
-    PREFIX = u'<!DOCTYPE '
+    PREFIX = '<!DOCTYPE '
-    SUFFIX = u'>\n'
+    SUFFIX = '>\n'
 class Tag(PageElement):
@@ -759,9 +797,12 @@ class Tag(PageElement):
        self.prefix = prefix
        if attrs is None:
            attrs = {}
-        elif attrs and builder.cdata_list_attributes:
+        elif attrs:
-            attrs = builder._replace_cdata_list_attribute_values(
+            if builder is not None and builder.cdata_list_attributes:
-                self.name, attrs)
+                attrs = builder._replace_cdata_list_attribute_values(
+                    self.name, attrs)
+            else:
+                attrs = dict(attrs)
        else:
            attrs = dict(attrs)
        self.attrs = attrs
@@ -778,6 +819,18 @@ class Tag(PageElement):
    parserClass = _alias("parser_class")  # BS3
+    def __copy__(self):
+        """A copy of a Tag is a new Tag, unconnected to the parse tree.
+        Its contents are a copy of the old Tag's contents.
+        """
+        clone = type(self)(None, self.builder, self.name, self.namespace,
+                           self.nsprefix, self.attrs)
+        for attr in ('can_be_empty_element', 'hidden'):
+            setattr(clone, attr, getattr(self, attr))
+        for child in self.contents:
+            clone.append(child.__copy__())
+        return clone
    @property
    def is_empty_element(self):
        """Is this tag an empty-element tag? (aka a self-closing tag)
@@ -843,7 +896,7 @@ class Tag(PageElement):
        for string in self._all_strings(True):
            yield string
-    def get_text(self, separator=u"", strip=False,
+    def get_text(self, separator="", strip=False,
                 types=(NavigableString, CData)):
        """
        Get all child strings, concatenated using the given separator.
@@ -915,7 +968,7 @@ class Tag(PageElement):
    def __contains__(self, x):
        return x in self.contents
-    def __nonzero__(self):
+    def __bool__(self):
        "A tag is non-None even if it has no contents."
        return True
@@ -971,15 +1024,25 @@ class Tag(PageElement):
        as defined in __eq__."""
        return not self == other
-    def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+    def __repr__(self, encoding="unicode-escape"):
        """Renders this tag as a string."""
-        return self.encode(encoding)
+        if PY3K:
+            # "The return value must be a string object", i.e. Unicode
+            return self.decode()
+        else:
+            # "The return value must be a string object", i.e. a bytestring.
+            # By convention, the return value of __repr__ should also be
+            # an ASCII string.
+            return self.encode(encoding)
    def __unicode__(self):
        return self.decode()
    def __str__(self):
-        return self.encode()
+        if PY3K:
+            return self.decode()
+        else:
+            return self.encode()
    if PY3K:
        __str__ = __repr__ = __unicode__
@@ -1014,7 +1077,7 @@ class Tag(PageElement):
        # First off, turn a string formatter into a function. This
        # will stop the lookup from happening over and over again.
-        if not callable(formatter):
+        if not isinstance(formatter, collections.Callable):
            formatter = self._formatter_for_name(formatter)
        attrs = []
@@ -1025,8 +1088,8 @@ class Tag(PageElement):
                else:
                    if isinstance(val, list) or isinstance(val, tuple):
                        val = ' '.join(val)
-                    elif not isinstance(val, basestring):
+                    elif not isinstance(val, str):
-                        val = unicode(val)
+                        val = str(val)
                    elif (
                        isinstance(val, AttributeValueWithCharsetSubstitution)
                        and eventual_encoding is not None):
@@ -1034,7 +1097,7 @@ class Tag(PageElement):
                    text = self.format_string(val, formatter)
                    decoded = (
-                        unicode(key) + '='
+                        str(key) + '='
                        + EntitySubstitution.quoted_attribute_value(text))
                attrs.append(decoded)
        close = ''
@@ -1103,16 +1166,22 @@ class Tag(PageElement):
                       formatter="minimal"):
        """Renders the contents of this tag as a Unicode string.
+        :param indent_level: Each line of the rendering will be
+           indented this many spaces.
        :param eventual_encoding: The tag is destined to be
           encoded into this encoding. This method is _not_
           responsible for performing that encoding. This information
           is passed in so that it can be substituted in if the
           document contains a <META> tag that mentions the document's
           encoding.
+        :param formatter: The output formatter responsible for converting
+           entities to Unicode characters.
        """
        # First off, turn a string formatter into a function. This
        # will stop the lookup from happening over and over again.
-        if not callable(formatter):
+        if not isinstance(formatter, collections.Callable):
            formatter = self._formatter_for_name(formatter)
        pretty_print = (indent_level is not None)
@@ -1137,7 +1206,17 @@ class Tag(PageElement):
    def encode_contents(
        self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
        formatter="minimal"):
-        """Renders the contents of this tag as a bytestring."""
+        """Renders the contents of this tag as a bytestring.
+        :param indent_level: Each line of the rendering will be
+           indented this many spaces.
+        :param eventual_encoding: The bytestring will be in this encoding.
+        :param formatter: The output formatter responsible for converting
+           entities to Unicode characters.
+        """
        contents = self.decode_contents(indent_level, encoding, formatter)
        return contents.encode(encoding)
@@ -1201,26 +1280,57 @@ class Tag(PageElement):
    _selector_combinators = ['>', '+', '~']
    _select_debug = False
-    def select(self, selector, _candidate_generator=None):
+    def select_one(self, selector):
+        """Perform a CSS selection operation on the current element."""
+        value = self.select(selector, limit=1)
+        if value:
+            return value[0]
+        return None
+    def select(self, selector, _candidate_generator=None, limit=None):
        """Perform a CSS selection operation on the current element."""
+        # Handle grouping selectors if ',' exists, ie: p,a
+        if ',' in selector:
+            context = []
+            for partial_selector in selector.split(','):
+                partial_selector = partial_selector.strip()
+                if partial_selector == '':
+                    raise ValueError('Invalid group selection syntax: %s' % selector)
+                candidates = self.select(partial_selector, limit=limit)
+                for candidate in candidates:
+                    if candidate not in context:
+                        context.append(candidate)
+                if limit and len(context) >= limit:
+                    break
+            return context
        tokens = selector.split()
        current_context = [self]
        if tokens[-1] in self._selector_combinators:
            raise ValueError(
                'Final combinator "%s" is missing an argument.' % tokens[-1])
        if self._select_debug:
-            print 'Running CSS selector "%s"' % selector
+            print('Running CSS selector "%s"' % selector)
        for index, token in enumerate(tokens):
-            if self._select_debug:
+            new_context = []
-                print ' Considering token "%s"' % token
+            new_context_ids = set([])
-            recursive_candidate_generator = None
-            tag_name = None
            if tokens[index-1] in self._selector_combinators:
                # This token was consumed by the previous combinator. Skip it.
                if self._select_debug:
-                    print '  Token was consumed by the previous combinator.'
+                    print('  Token was consumed by the previous combinator.')
                continue
+            if self._select_debug:
+                print(' Considering token "%s"' % token)
+            recursive_candidate_generator = None
+            tag_name = None
            # Each operation corresponds to a checker function, a rule
            # for determining whether a candidate matches the
            # selector. Candidates are generated by the active
@@ -1256,35 +1366,38 @@ class Tag(PageElement):
                        "A pseudo-class must be prefixed with a tag name.")
                pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
                found = []
-                if pseudo_attributes is not None:
+                if pseudo_attributes is None:
+                    pseudo_type = pseudo
+                    pseudo_value = None
+                else:
                    pseudo_type, pseudo_value = pseudo_attributes.groups()
-                    if pseudo_type == 'nth-of-type':
+                if pseudo_type == 'nth-of-type':
-                        try:
+                    try:
-                            pseudo_value = int(pseudo_value)
+                        pseudo_value = int(pseudo_value)
-                        except:
+                    except:
-                            raise NotImplementedError(
-                                'Only numeric values are currently supported for the nth-of-type pseudo-class.')
-                        if pseudo_value < 1:
-                            raise ValueError(
-                                'nth-of-type pseudo-class value must be at least 1.')
-                        class Counter(object):
-                            def __init__(self, destination):
-                                self.count = 0
-                                self.destination = destination
-                            def nth_child_of_type(self, tag):
-                                self.count += 1
-                                if self.count == self.destination:
-                                    return True
-                                if self.count > self.destination:
-                                    # Stop the generator that's sending us
-                                    # these things.
-                                    raise StopIteration()
-                                return False
-                        checker = Counter(pseudo_value).nth_child_of_type
-                    else:
                        raise NotImplementedError(
-                            'Only the following pseudo-classes are implemented: nth-of-type.')
+                            'Only numeric values are currently supported for the nth-of-type pseudo-class.')
+                    if pseudo_value < 1:
+                        raise ValueError(
+                            'nth-of-type pseudo-class value must be at least 1.')
+                    class Counter(object):
+                        def __init__(self, destination):
+                            self.count = 0
+                            self.destination = destination
+                        def nth_child_of_type(self, tag):
+                            self.count += 1
+                            if self.count == self.destination:
+                                return True
+                            if self.count > self.destination:
+                                # Stop the generator that's sending us
+                                # these things.
+                                raise StopIteration()
+                            return False
+                    checker = Counter(pseudo_value).nth_child_of_type
+                else:
+                    raise NotImplementedError(
+                        'Only the following pseudo-classes are implemented: nth-of-type.')
            elif token == '*':
                # Star selector -- matches everything
@@ -1311,7 +1424,6 @@ class Tag(PageElement):
            else:
                raise ValueError(
                    'Unsupported or invalid CSS selector: "%s"' % token)
            if recursive_candidate_generator:
                # This happens when the selector looks like  "> foo".
                #
@@ -1325,14 +1437,14 @@ class Tag(PageElement):
                next_token = tokens[index+1]
                def recursive_select(tag):
                    if self._select_debug:
-                        print '    Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs)
+                        print('    Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs))
-                        print '-' * 40
+                        print('-' * 40)
                    for i in tag.select(next_token, recursive_candidate_generator):
                        if self._select_debug:
-                            print '(Recursive select picked up candidate %s %s)' % (i.name, i.attrs)
+                            print('(Recursive select picked up candidate %s %s)' % (i.name, i.attrs))
                        yield i
                    if self._select_debug:
-                        print '-' * 40
+                        print('-' * 40)
                _use_candidate_generator = recursive_select
            elif _candidate_generator is None:
                # By default, a tag's candidates are all of its
@@ -1343,7 +1455,7 @@ class Tag(PageElement):
                        check = "[any]"
                    else:
                        check = tag_name
-                    print '   Default candidate generator, tag name="%s"' % check
+                    print('   Default candidate generator, tag name="%s"' % check)
                if self._select_debug:
                    # This is redundant with later code, but it stops
                    # a bunch of bogus tags from cluttering up the
@@ -1361,12 +1473,11 @@ class Tag(PageElement):
            else:
                _use_candidate_generator = _candidate_generator
-            new_context = []
+            count = 0
-            new_context_ids = set([])
            for tag in current_context:
                if self._select_debug:
-                    print "    Running candidate generator on %s %s" % (
+                    print("    Running candidate generator on %s %s" % (
-                        tag.name, repr(tag.attrs))
+                        tag.name, repr(tag.attrs)))
                for candidate in _use_candidate_generator(tag):
                    if not isinstance(candidate, Tag):
                        continue
@@ -1381,21 +1492,24 @@ class Tag(PageElement):
                            break
                    if checker is None or result:
                        if self._select_debug:
-                            print "     SUCCESS %s %s" % (candidate.name, repr(candidate.attrs))
+                            print("     SUCCESS %s %s" % (candidate.name, repr(candidate.attrs)))
                        if id(candidate) not in new_context_ids:
                            # If a tag matches a selector more than once,
                            # don't include it in the context more than once.
                            new_context.append(candidate)
                            new_context_ids.add(id(candidate))
+                            if limit and len(new_context) >= limit:
+                                break
                    elif self._select_debug:
-                        print "     FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
+                        print("     FAILURE %s %s" % (candidate.name, repr(candidate.attrs)))
            current_context = new_context
        if self._select_debug:
-            print "Final verdict:"
+            print("Final verdict:")
            for i in current_context:
-                print " %s %s" % (i.name, i.attrs)
+                print(" %s %s" % (i.name, i.attrs))
        return current_context
    # Old names for backwards compatibility
@@ -1439,7 +1553,7 @@ class SoupStrainer(object):
            else:
                attrs = kwargs
        normalized_attrs = {}
-        for key, value in attrs.items():
+        for key, value in list(attrs.items()):
            normalized_attrs[key] = self._normalize_search_value(value)
        self.attrs = normalized_attrs
@@ -1448,7 +1562,7 @@ class SoupStrainer(object):
    def _normalize_search_value(self, value):
        # Leave it alone if it's a Unicode string, a callable, a
        # regular expression, a boolean, or None.
-        if (isinstance(value, unicode) or callable(value) or hasattr(value, 'match')
+        if (isinstance(value, str) or isinstance(value, collections.Callable) or hasattr(value, 'match')
            or isinstance(value, bool) or value is None):
            return value
@@ -1461,7 +1575,7 @@ class SoupStrainer(object):
            new_value = []
            for v in value:
                if (hasattr(v, '__iter__') and not isinstance(v, bytes)
-                    and not isinstance(v, unicode)):
+                    and not isinstance(v, str)):
                    # This is almost certainly the user's mistake. In the
                    # interests of avoiding infinite loops, we'll let
                    # it through as-is rather than doing a recursive call.
@@ -1473,7 +1587,7 @@ class SoupStrainer(object):
        # Otherwise, convert it into a Unicode string.
        # The unicode(str()) thing is so this will do the same thing on Python 2
        # and Python 3.
-        return unicode(str(value))
+        return str(str(value))
    def __str__(self):
        if self.text:
@@ -1527,7 +1641,7 @@ class SoupStrainer(object):
        found = None
        # If given a list of items, scan it for a text element that
        # matches.
-        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)):
+        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, str)):
            for element in markup:
                if isinstance(element, NavigableString) \
                       and self.search(element):
@@ -1540,7 +1654,7 @@ class SoupStrainer(object):
                found = self.search_tag(markup)
        # If it's text, make sure the text matches.
        elif isinstance(markup, NavigableString) or \
-                 isinstance(markup, basestring):
+                 isinstance(markup, str):
            if not self.name and not self.attrs and self._matches(markup, self.text):
                found = markup
        else:
@@ -1554,7 +1668,7 @@ class SoupStrainer(object):
        if isinstance(markup, list) or isinstance(markup, tuple):
            # This should only happen when searching a multi-valued attribute
            # like 'class'.
-            if (isinstance(match_against, unicode)
+            if (isinstance(match_against, str)
                and ' ' in match_against):
                # A bit of a special case. If they try to match "foo
                # bar" on a multivalue attribute's value, only accept
@@ -1589,7 +1703,7 @@ class SoupStrainer(object):
            # None matches None, False, an empty string, an empty list, and so on.
            return not match_against
-        if isinstance(match_against, unicode):
+        if isinstance(match_against, str):
            # Exact string match
            return markup == match_against