Skip to content
Snippets Groups Projects
Commit 96e355bb authored by HeJ's avatar HeJ
Browse files

core: extract markdown.MyHtmlRenderer.handle_link() into utils.resolve_link()

parent 9cdff1b5
Branches
Tags
No related merge requests found
import html
import re
from urllib.parse import quote, urlparse
import bleach
import mistletoe
......@@ -8,7 +7,6 @@ from mistletoe.block_token import BlockToken
from mistletoe.html_renderer import HTMLRenderer
from mistletoe.span_token import AutoLink, Link, SpanToken, tokenize_inner
from django.conf import settings
from django.db.models import Model
from django.urls import NoReverseMatch, reverse
from django.utils.safestring import mark_safe
......@@ -17,24 +15,13 @@ from modeltranslation.fields import build_localized_fieldname
from modeltranslation.settings import AVAILABLE_LANGUAGES
from .models import conference
from .utils import scheme_and_netloc_from_url, url_in_allowlist
from .utils import resolve_link
def markdown_header_slugify(value: str, separator: str) -> str:
return 'md-' + slugify(value)
def is_trusted_dst(url: str):
url = urlparse(url)
is_local_domain = url.netloc in settings.ALLOWED_HOSTS
is_external = (url.scheme or url.netloc) and not is_local_domain
return not is_external or url.scheme not in {'http', 'https', 'ftp', 'ftps'}
def redirect_via_dereferer(url: str):
return settings.PLAINUI_DEREFERER_URL.format(quoted_target=quote(url))
class PageLink(SpanToken):
pattern = re.compile(r'\[\[ *([^|\]]+?) *(?:\| *(.*))? *\]\]')
parse_group = 2
......@@ -109,46 +96,17 @@ class MyHtmlRenderer(HTMLRenderer):
result += '</div>\n'
return result
def __init__(self, conf: 'conference.Conference', result: 'RenderResult', *extras, derefer_allowlist: bool = True, **kwargs):
def __init__(self, conf: 'conference.Conference', result: 'RenderResult', *extras, use_derefer_allowlist: bool = True, **kwargs):
self.conf = conf
self.result = result
self.derefer_allowlist = derefer_allowlist
self.use_derefer_allowlist = use_derefer_allowlist
super().__init__(PageLink, ProfileLink, Tag, AlertBlock, *extras, **kwargs)
def derive_link_target(self, url):
"""rewrite given URL unless it is trusted or in dereferrer-allowlist while those shall not be dereferred"""
do_derefer = True
if not self.derefer_allowlist:
try:
scheme_and_netloc = scheme_and_netloc_from_url(url)
if url_in_allowlist(scheme_and_netloc, settings.DEREFERRER_GLOBAL_ALLOWLIST):
do_derefer = False
except ValueError:
# ignore URL parsing error
pass
return redirect_via_dereferer(url) if do_derefer else url
def handle_link(self, url: str) -> tuple[str, str]:
from .utils import resolve_internal_url
# attempt resolving an internal URL
if resolved_internal_url := resolve_internal_url(url, accept_http_https=False, fallback_as_is=False):
url = resolved_internal_url
# derive external link (i.e. apply dereferer), if its not an internal or trusted location
if resolved_internal_url is None and not is_trusted_dst(url):
return 'external', self.derive_link_target(url)
# otherwise, it's an internal link
return 'internal', url
def render_link(self, token: Link) -> str:
if token.target.startswith(('javascript:', 'data:')):
token.target = ''
link_type, url = self.handle_link(token.target)
link_type, url = resolve_link(token.target, self.use_derefer_allowlist)
self.result.linked_urls.add(url)
template = '<a href="{target}"{title} class="{link_type}">{inner}</a>'
......@@ -269,7 +227,7 @@ def render_markdown_ex(
result = RenderResult()
with renderer(conf, result) as renderer:
renderer.derefer_allowlist = not dont_derefer_allowlist
renderer.use_derefer_allowlist = dont_derefer_allowlist
rendered_markup = renderer.render(mistletoe.Document(markup))
if sanitize_html:
......
......@@ -19,6 +19,7 @@ class MarkdownTest(TestCase):
conf = Conference(name='foo', id=TEST_CONF_ID)
conf.save()
# TODO: consider moving this test into tests/utils.py as this is basically testing resolve_link() only (except the footnote/anchored links part)
tests = [
('https://localhost/', False),
('https://localhost/foo', False),
......
......@@ -8,7 +8,7 @@ import uuid
from datetime import UTC, datetime, timedelta
from pathlib import Path
from string import ascii_letters, digits
from urllib.parse import parse_qs, urlparse, urlunparse
from urllib.parse import parse_qs, quote, urlparse, urlunparse
import requests
......@@ -213,6 +213,49 @@ def resolve_internal_url(url: str, accept_http_https: bool = True, fallback_as_i
return url if fallback_as_is else None
def get_dereferred_url(url: str, use_derefer_allowlist: bool = True):
"""rewrite given URL unless it is trusted or in dereferrer-allowlist while those shall not be dereferred"""
do_derefer = True
if use_derefer_allowlist:
try:
scheme_and_netloc = scheme_and_netloc_from_url(url)
if url_in_allowlist(scheme_and_netloc, settings.DEREFERRER_GLOBAL_ALLOWLIST):
do_derefer = False
except ValueError:
# ignore URL parsing error
pass
return settings.PLAINUI_DEREFERER_URL.format(quoted_target=quote(url)) if do_derefer else url
def is_trusted_link_destination(url: str):
url = urlparse(url)
is_local_domain = url.netloc in settings.ALLOWED_HOSTS
is_external = (url.scheme or url.netloc) and not is_local_domain
return not is_external or url.scheme not in {'http', 'https', 'ftp', 'ftps'}
def resolve_link(url: str, use_derefer_allowlist: bool = True) -> tuple[str, str]:
"""
Resolves a given URL, classifies it as internal or external and optionally rewrites it to use the dereferrer.
:param url: the original URL to resolve
:param use_derefer_allowlist: controls if the global allowlist (see settings.DEREFERRER_GLOBAL_ALLOWLIST) shall be used
:return: tuple with two values, the first being either 'internal' or 'external', the second being the resolved link (might be to the dereferrer)
"""
# attempt resolving an internal URL
if resolved_internal_url := resolve_internal_url(url, accept_http_https=False, fallback_as_is=False):
url = resolved_internal_url
# derive external link (i.e. apply dereferer), if its not an internal or trusted location
if resolved_internal_url is None and not is_trusted_link_destination(url):
return 'external', get_dereferred_url(url, use_derefer_allowlist=use_derefer_allowlist)
# otherwise, it's an internal link
return 'internal', url
def download_from_url(url: str) -> tuple[str, bytes]:
# let requests library fetch the URL
r = requests.get(url, timeout=30)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment