Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
hub
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Package registry
Operate
Terraform modules
Analyze
Contributor analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
thomasDOTwtf
hub
Commits
96e355bb
Commit
96e355bb
authored
5 months ago
by
HeJ
Browse files
Options
Downloads
Patches
Plain Diff
core: extract markdown.MyHtmlRenderer.handle_link() into utils.resolve_link()
parent
9cdff1b5
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/core/markdown.py
+5
-47
5 additions, 47 deletions
src/core/markdown.py
src/core/tests/markdown.py
+1
-0
1 addition, 0 deletions
src/core/tests/markdown.py
src/core/utils.py
+44
-1
44 additions, 1 deletion
src/core/utils.py
with
50 additions
and
48 deletions
src/core/markdown.py
+
5
−
47
View file @
96e355bb
import
html
import
re
from
urllib.parse
import
quote
,
urlparse
import
bleach
import
mistletoe
...
...
@@ -8,7 +7,6 @@ from mistletoe.block_token import BlockToken
from
mistletoe.html_renderer
import
HTMLRenderer
from
mistletoe.span_token
import
AutoLink
,
Link
,
SpanToken
,
tokenize_inner
from
django.conf
import
settings
from
django.db.models
import
Model
from
django.urls
import
NoReverseMatch
,
reverse
from
django.utils.safestring
import
mark_safe
...
...
@@ -17,24 +15,13 @@ from modeltranslation.fields import build_localized_fieldname
from
modeltranslation.settings
import
AVAILABLE_LANGUAGES
from
.models
import
conference
from
.utils
import
scheme_and_netloc_from_url
,
url_in_allowlist
from
.utils
import
resolve_link
def
markdown_header_slugify
(
value
:
str
,
separator
:
str
)
->
str
:
return
'
md-
'
+
slugify
(
value
)
def
is_trusted_dst
(
url
:
str
):
url
=
urlparse
(
url
)
is_local_domain
=
url
.
netloc
in
settings
.
ALLOWED_HOSTS
is_external
=
(
url
.
scheme
or
url
.
netloc
)
and
not
is_local_domain
return
not
is_external
or
url
.
scheme
not
in
{
'
http
'
,
'
https
'
,
'
ftp
'
,
'
ftps
'
}
def
redirect_via_dereferer
(
url
:
str
):
return
settings
.
PLAINUI_DEREFERER_URL
.
format
(
quoted_target
=
quote
(
url
))
class
PageLink
(
SpanToken
):
pattern
=
re
.
compile
(
r
'
\[\[ *([^|\]]+?) *(?:\| *(.*))? *\]\]
'
)
parse_group
=
2
...
...
@@ -109,46 +96,17 @@ class MyHtmlRenderer(HTMLRenderer):
result
+=
'
</div>
\n
'
return
result
def
__init__
(
self
,
conf
:
'
conference.Conference
'
,
result
:
'
RenderResult
'
,
*
extras
,
derefer_allowlist
:
bool
=
True
,
**
kwargs
):
def
__init__
(
self
,
conf
:
'
conference.Conference
'
,
result
:
'
RenderResult
'
,
*
extras
,
use_
derefer_allowlist
:
bool
=
True
,
**
kwargs
):
self
.
conf
=
conf
self
.
result
=
result
self
.
derefer_allowlist
=
derefer_allowlist
self
.
use_
derefer_allowlist
=
use_
derefer_allowlist
super
().
__init__
(
PageLink
,
ProfileLink
,
Tag
,
AlertBlock
,
*
extras
,
**
kwargs
)
def
derive_link_target
(
self
,
url
):
"""
rewrite given URL unless it is trusted or in dereferrer-allowlist while those shall not be dereferred
"""
do_derefer
=
True
if
not
self
.
derefer_allowlist
:
try
:
scheme_and_netloc
=
scheme_and_netloc_from_url
(
url
)
if
url_in_allowlist
(
scheme_and_netloc
,
settings
.
DEREFERRER_GLOBAL_ALLOWLIST
):
do_derefer
=
False
except
ValueError
:
# ignore URL parsing error
pass
return
redirect_via_dereferer
(
url
)
if
do_derefer
else
url
def
handle_link
(
self
,
url
:
str
)
->
tuple
[
str
,
str
]:
from
.utils
import
resolve_internal_url
# attempt resolving an internal URL
if
resolved_internal_url
:
=
resolve_internal_url
(
url
,
accept_http_https
=
False
,
fallback_as_is
=
False
):
url
=
resolved_internal_url
# derive external link (i.e. apply dereferer), if its not an internal or trusted location
if
resolved_internal_url
is
None
and
not
is_trusted_dst
(
url
):
return
'
external
'
,
self
.
derive_link_target
(
url
)
# otherwise, it's an internal link
return
'
internal
'
,
url
def
render_link
(
self
,
token
:
Link
)
->
str
:
if
token
.
target
.
startswith
((
'
javascript:
'
,
'
data:
'
)):
token
.
target
=
''
link_type
,
url
=
self
.
handl
e_link
(
token
.
target
)
link_type
,
url
=
resolv
e_link
(
token
.
target
,
self
.
use_derefer_allowlist
)
self
.
result
.
linked_urls
.
add
(
url
)
template
=
'
<a href=
"
{target}
"
{title} class=
"
{link_type}
"
>{inner}</a>
'
...
...
@@ -269,7 +227,7 @@ def render_markdown_ex(
result
=
RenderResult
()
with
renderer
(
conf
,
result
)
as
renderer
:
renderer
.
derefer_allowlist
=
not
dont_derefer_allowlist
renderer
.
use_
derefer_allowlist
=
dont_derefer_allowlist
rendered_markup
=
renderer
.
render
(
mistletoe
.
Document
(
markup
))
if
sanitize_html
:
...
...
This diff is collapsed.
Click to expand it.
src/core/tests/markdown.py
+
1
−
0
View file @
96e355bb
...
...
@@ -19,6 +19,7 @@ class MarkdownTest(TestCase):
conf
=
Conference
(
name
=
'
foo
'
,
id
=
TEST_CONF_ID
)
conf
.
save
()
# TODO: consider moving this test into tests/utils.py as this is basically testing resolve_link() only (except the footnote/anchored links part)
tests
=
[
(
'
https://localhost/
'
,
False
),
(
'
https://localhost/foo
'
,
False
),
...
...
This diff is collapsed.
Click to expand it.
src/core/utils.py
+
44
−
1
View file @
96e355bb
...
...
@@ -8,7 +8,7 @@ import uuid
from
datetime
import
UTC
,
datetime
,
timedelta
from
pathlib
import
Path
from
string
import
ascii_letters
,
digits
from
urllib.parse
import
parse_qs
,
urlparse
,
urlunparse
from
urllib.parse
import
parse_qs
,
quote
,
urlparse
,
urlunparse
import
requests
...
...
@@ -213,6 +213,49 @@ def resolve_internal_url(url: str, accept_http_https: bool = True, fallback_as_i
return
url
if
fallback_as_is
else
None
def
get_dereferred_url
(
url
:
str
,
use_derefer_allowlist
:
bool
=
True
):
"""
rewrite given URL unless it is trusted or in dereferrer-allowlist while those shall not be dereferred
"""
do_derefer
=
True
if
use_derefer_allowlist
:
try
:
scheme_and_netloc
=
scheme_and_netloc_from_url
(
url
)
if
url_in_allowlist
(
scheme_and_netloc
,
settings
.
DEREFERRER_GLOBAL_ALLOWLIST
):
do_derefer
=
False
except
ValueError
:
# ignore URL parsing error
pass
return
settings
.
PLAINUI_DEREFERER_URL
.
format
(
quoted_target
=
quote
(
url
))
if
do_derefer
else
url
def
is_trusted_link_destination
(
url
:
str
):
url
=
urlparse
(
url
)
is_local_domain
=
url
.
netloc
in
settings
.
ALLOWED_HOSTS
is_external
=
(
url
.
scheme
or
url
.
netloc
)
and
not
is_local_domain
return
not
is_external
or
url
.
scheme
not
in
{
'
http
'
,
'
https
'
,
'
ftp
'
,
'
ftps
'
}
def
resolve_link
(
url
:
str
,
use_derefer_allowlist
:
bool
=
True
)
->
tuple
[
str
,
str
]:
"""
Resolves a given URL, classifies it as internal or external and optionally rewrites it to use the dereferrer.
:param url: the original URL to resolve
:param use_derefer_allowlist: controls if the global allowlist (see settings.DEREFERRER_GLOBAL_ALLOWLIST) shall be used
:return: tuple with two values, the first being either
'
internal
'
or
'
external
'
, the second being the resolved link (might be to the dereferrer)
"""
# attempt resolving an internal URL
if
resolved_internal_url
:
=
resolve_internal_url
(
url
,
accept_http_https
=
False
,
fallback_as_is
=
False
):
url
=
resolved_internal_url
# derive external link (i.e. apply dereferer), if its not an internal or trusted location
if
resolved_internal_url
is
None
and
not
is_trusted_link_destination
(
url
):
return
'
external
'
,
get_dereferred_url
(
url
,
use_derefer_allowlist
=
use_derefer_allowlist
)
# otherwise, it's an internal link
return
'
internal
'
,
url
def
download_from_url
(
url
:
str
)
->
tuple
[
str
,
bytes
]:
# let requests library fetch the URL
r
=
requests
.
get
(
url
,
timeout
=
30
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment