Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Remove img_with_alt option entirely #200

Merged
merged 1 commit into from
Jul 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ p.to_dict(filter_by_type="h-entry")
p.to_json(filter_by_type="h-entry")
```

## Experimental Features 🧪
## Breaking Changes in v2

- Pass the optional argument `img_with_alt=True` to either the `Parser` object or to the `parse` method to enable parsing of the `alt` attribute of `<img>` tags according to [issue: image alt text is lost during parsing](https://github.com/microformats/microformats2-parsing/issues/2). By default this is `False` to be backwards compatible.
- img alt support is now on by default

## FAQs ❓

Expand Down
4 changes: 2 additions & 2 deletions mf2py/dom_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def get_attr(el, attr, check_name=None):
return el.get(attr)


def get_img_src_alt(img, img_with_alt, base_url=""):
def get_img_src_alt(img, base_url=""):
"""given a img element, returns both src and alt attributes as a list of tuples if alt exists, else returns the src as a string
use for alt parsing with img
"""
Expand All @@ -50,7 +50,7 @@ def get_img_src_alt(img, img_with_alt, base_url=""):
if src is not None:
src = try_urljoin(base_url, src)

if alt is None or not img_with_alt:
if alt is None:
return src
else:
return {"value": src, "alt": alt}
Expand Down
7 changes: 3 additions & 4 deletions mf2py/implied_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,11 @@ def non_empty(val):
return get_textContent(el, replace_img=True, img_to_src=False, base_url=base_url)


def photo(el, img_with_alt, base_url=""):
def photo(el, base_url=""):
"""Find an implied photo property

Args:
el (bs4.element.Tag): a DOM element
img_with_alt: a flag to enable experimental parsing of alt attribute with img (set by the Parser object)
base_url (string): the base URL to use, to reconcile relative URLs

Returns:
Expand All @@ -105,7 +104,7 @@ def get_photo_child(children):
return poss_obj

# if element is an img use source if exists
prop_value = get_img_src_alt(el, img_with_alt, base_url)
prop_value = get_img_src_alt(el, base_url)
if prop_value is not None:
return prop_value

Expand All @@ -132,7 +131,7 @@ def get_photo_child(children):
# if a possible child was found parse
if poss_child is not None:
# img get src
prop_value = get_img_src_alt(poss_child, img_with_alt, base_url)
prop_value = get_img_src_alt(poss_child, base_url)
if prop_value is not None:
return prop_value

Expand Down
4 changes: 2 additions & 2 deletions mf2py/parse_property.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ def text(el, base_url=""):
return prop_value


def url(el, img_with_alt, base_url=""):
def url(el, base_url=""):
"""Process u-* properties"""

prop_value = get_attr(el, "href", check_name=("a", "area", "link"))
if prop_value is None:
prop_value = get_img_src_alt(el, img_with_alt, base_url)
prop_value = get_img_src_alt(el, base_url)
if prop_value is not None:
return prop_value
if prop_value is None:
Expand Down
19 changes: 5 additions & 14 deletions mf2py/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from .version import __version__


def parse(doc=None, url=None, html_parser=None, img_with_alt=False):
def parse(doc=None, url=None, html_parser=None):
"""
Parse a microformats2 document or url and return a json dictionary.

Expand All @@ -28,7 +28,7 @@ def parse(doc=None, url=None, html_parser=None, img_with_alt=False):

Return: a json dict represented the structured data in this document.
"""
return Parser(doc, url, html_parser, img_with_alt).to_dict()
return Parser(doc, url, html_parser).to_dict()


class Parser(object):
Expand All @@ -54,7 +54,7 @@ class Parser(object):
ua_url = "https://github.com/microformats/mf2py"
useragent = "{0} - version {1} - {2}".format(ua_desc, __version__, ua_url)

def __init__(self, doc=None, url=None, html_parser=None, img_with_alt=True):
def __init__(self, doc=None, url=None, html_parser=None):
self.__url__ = None
self.__doc__ = None
self._preserve_doc = False
Expand All @@ -68,7 +68,6 @@ def __init__(self, doc=None, url=None, html_parser=None, img_with_alt=True):
"version": __version__,
},
}
self.__img_with_alt__ = img_with_alt

# use default parser if none specified
self.__html_parser__ = html_parser or "html5lib"
Expand Down Expand Up @@ -194,11 +193,7 @@ def handle_microformat(
if "photo" not in properties and parsed_types_aggregation.isdisjoint(
"uh"
):
x = implied_properties.photo(
el,
self.__img_with_alt__,
base_url=self.__url__,
)
x = implied_properties.photo(el, base_url=self.__url__)
if x is not None:
properties["photo"] = [x]

Expand Down Expand Up @@ -304,11 +299,7 @@ def parse_props(el):

# if value has not been parsed then parse it
if u_value is None:
u_value = parse_property.url(
el,
self.__img_with_alt__,
base_url=self.__url__,
)
u_value = parse_property.url(el, base_url=self.__url__)

if root_class_names:
prop_value.append(
Expand Down
11 changes: 3 additions & 8 deletions test/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,20 +852,15 @@ def test_whitespace_with_tags_inside_property():
assert result["items"][0]["properties"] == {"name": ["foo bar"]}


# experimental features tests


def test_photo_with_alt():
"""Confirm that alt text in img is parsed with feature flag img_with_alt under as a u-* property and implied photo"""
"""Confirm that alt text in img is parsed as a u-* property and implied photo"""

path = "experimental/img_with_alt.html"
path = "img_with_alt.html"

# without flag
result = parse_fixture(path)

# experimental img_with_alt=True
with open(os.path.join(TEST_DIR, path)) as f:
exp_result = Parser(doc=f, html_parser="html5lib", img_with_alt=True).to_dict()
exp_result = Parser(doc=f, html_parser="html5lib").to_dict()

# simple img with u-*
assert "/photo.jpg" == result["items"][0]["properties"]["photo"][0]
Expand Down