File: //proc/1233/cwd/home/arjun/projects/aigenerator/venv/lib/python3.12/site-packages/cairosvg/url.py
"""
Utils dealing with URLs.
"""
import os.path
import re
from pathlib import Path
from urllib.parse import urljoin, urlparse
from urllib.request import Request, urlopen
from . import VERSION
HTTP_HEADERS = {'User-Agent': f'CairoSVG {VERSION}'}
URL = re.compile(r'url\((.+)\)')
def normalize_url(url):
"""Normalize ``url`` for underlying NT/Unix operating systems.
The input ``url`` may look like the following:
- C:\\Directory\\zzz.svg
- file://C:\\Directory\\zzz.svg
- zzz.svg
The output ``url`` on NT systems would look like below:
- file:///C:/Directory/zzz.svg
"""
if url and os.name == 'nt' and not url.startswith('data:'):
# Match input ``url`` like the following:
# - C:\\Directory\\zzz.svg
# - Blah.svg
if not url.startswith('file:') and os.path.isabs(url):
url = os.path.abspath(url)
if '#' in url:
url, part = url.rsplit('#', 1)
else:
part = None
url = Path(url).resolve().as_uri()
if part is not None:
url = url + '#' + part
# Match input ``url`` like the following:
# - file://C:\\Directory\\zzz.svg
elif re.match(
'^file://[a-z]:', url,
re.IGNORECASE | re.MULTILINE | re.DOTALL):
url = url.replace('//', '///')
url = url.replace('\\', '/')
return url
def nt_compatible_path(path):
"""Provide compatible NT file paths for ``os.path`` functions
``os.path`` expects NT paths with no ``/`` at the beginning. For
example, ``/C:/Directory/zzz.svg`` would fail ``os.path.isfile()``,
``os.path.isdir()`` etc. where the expected input for `os.path`
functions is ``/C:/Directory/zzz.svg``.
Currently ``nt_compatible_path`` performs some basic checks and
eliminates the unwanted ``/`` at the beginning.
"""
if os.name == 'nt' and re.match(
'^/[a-z]:/', path, re.IGNORECASE | re.MULTILINE | re.DOTALL):
return re.sub('^/', '', path, re.IGNORECASE | re.MULTILINE | re.DOTALL)
else:
return path
def fetch(url, resource_type):
"""Fetch the content of ``url``.
``resource_type`` is the mimetype of the resource (currently one of
image/*, image/svg+xml, text/css).
"""
return urlopen(Request(url, headers=HTTP_HEADERS)).read()
def safe_fetch(url, resource_type):
"""Fetch the content of ``url`` only if it’s a data-URL.
Otherwise, return an empty SVG.
"""
if url and url.startswith('data:'):
return fetch(url, resource_type)
return b'<svg width="1" height="1"></svg>'
def parse_url(url, base=None):
"""Parse an URL.
The URL can be surrounded by a ``url()`` string. If ``base`` is not `None`,
the "folder" part of it is prepended to the URL.
"""
if url:
match = URL.search(url)
if match:
url = match.group(1)
if base:
parsed_base = urlparse(base)
parsed_url = urlparse(url)
if parsed_base.scheme in ('', 'file'):
if parsed_url.scheme in ('', 'file'):
parsed_base_path = nt_compatible_path(parsed_base.path)
parsed_url_path = nt_compatible_path(parsed_url.path)
# We are sure that `url` and `base` are both file-like URLs
if os.path.isfile(parsed_base_path):
if parsed_url_path:
# Take the "folder" part of `base`, as
# `os.path.join` doesn't strip the file name
url = os.path.join(
os.path.dirname(parsed_base_path),
parsed_url_path)
else:
url = parsed_base_path
elif os.path.isdir(parsed_base_path):
if parsed_url_path:
url = os.path.join(
parsed_base_path, parsed_url_path)
else:
url = ''
else:
url = ''
if parsed_url.fragment:
url = f'{url}#{parsed_url.fragment}'
elif parsed_url.scheme in ('', parsed_base.scheme):
# `urljoin` automatically uses the "folder" part of `base`
url = urljoin(base, url)
url = normalize_url(url.strip('\'"'))
return urlparse(url or '')
def read_url(url, url_fetcher, resource_type):
"""Get bytes in a parsed ``url`` using ``url_fetcher``.
If ``url_fetcher`` is None a default (no limitations) URLFetcher is used.
"""
if url.scheme:
url = url.geturl()
else:
url = f'file://{os.path.abspath(url.geturl())}'
url = normalize_url(url)
return url_fetcher(url, resource_type)