Skip to content

Commit 87b6921

Browse files
authored
Merge pull request #188 from American-Institutes-for-Research/HEA-764/Add-an-AssetDownloadView-to-Django
Hea 764/add an asset download view to django
2 parents 0e3d51c + 8494ef3 commit 87b6921

5 files changed

Lines changed: 99 additions & 6 deletions

File tree

apps/common/views.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import os
22

3+
import fsspec
4+
from dagster import AssetKey, DagsterEventType, DagsterInstance, EventRecordsFilter
35
from django.contrib.auth.mixins import LoginRequiredMixin, PermissionRequiredMixin
6+
from django.http import Http404, HttpResponseRedirect
7+
from django.views import View
48
from revproxy.views import ProxyView
59

610

@@ -9,3 +13,45 @@ class DagsterProxyView(LoginRequiredMixin, PermissionRequiredMixin, ProxyView):
913
upstream = f"{os.environ.get('DAGSTER_WEBSERVER_URL')}/{os.environ.get('DAGSTER_WEBSERVER_PREFIX')}/"
1014
permission_required = "common.access_dagster_ui"
1115
raise_exception = False
16+
17+
18+
class AssetDownloadView(LoginRequiredMixin, PermissionRequiredMixin, View):
19+
"""
20+
A view that generates the s3 url for direct download of materialized assets
21+
"""
22+
23+
permission_required = "common.access_dagster_ui"
24+
login_url = "/admin/login/"
25+
raise_exception = False
26+
27+
def get(self, request, asset_name, partition_name=None):
28+
try:
29+
with DagsterInstance.get() as instance:
30+
filter_kwargs = {
31+
"event_type": DagsterEventType.ASSET_MATERIALIZATION,
32+
"asset_key": AssetKey([asset_name]),
33+
}
34+
if partition_name:
35+
filter_kwargs["asset_partitions"] = [partition_name]
36+
37+
records = list(instance.get_event_records(EventRecordsFilter(**filter_kwargs), limit=1))
38+
if not records:
39+
raise Http404(f"No materialization found for asset '{asset_name}'")
40+
41+
metadata = records[0].asset_materialization.metadata or {}
42+
asset_uri = metadata.get("path")
43+
if not asset_uri:
44+
raise Http404(f"Asset '{asset_name}' does not contain a 'path' in metadata")
45+
46+
# Generate signed URL for S3
47+
protocol, path = asset_uri.value.split("://", 1)
48+
fs = fsspec.filesystem(protocol)
49+
50+
if not hasattr(fs, "sign"):
51+
raise NotImplementedError(f"Filesystem '{protocol}' does not support pre-signed URLs")
52+
53+
signed_url = fs.sign(path)
54+
return HttpResponseRedirect(signed_url)
55+
56+
except Exception as e:
57+
raise Http404(f"Failed to locate or sign asset '{asset_name}': {str(e)}")

docker-compose.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ services:
111111
SUPPORT_EMAIL_ADDRESS: ${SUPPORT_EMAIL_ADDRESS}
112112
DJANGO_MIGRATE: 1
113113
GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS}
114+
BASELINE_EXPLORER_API_ROOT_URL: ${BASELINE_EXPLORER_API_ROOT_URL}
114115
command:
115116
- --timeout=3600
116117
- --workers=12

env.example

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,7 @@ BSS_FILES_STORAGE_OPTIONS='{"token": "service_account", "access": "read_only", "
5454
# LAUNCHER can be used to configure a wrapper program around the Python process
5555
# For example, to add ddtrace or debugpy
5656
# Use the VSCode debugger as a launcher
57-
# LAUNCHER = "python3 -m debugpy --listen 0.0.0.0:5679"
57+
# LAUNCHER = "python3 -m debugpy --listen 0.0.0.0:5679"
58+
59+
# Baseline Explorer API Root URL
60+
BASELINE_EXPLORER_API_ROOT_URL=http://localhost:8000

hea/urls.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
WealthGroupViewSet,
5050
WildFoodGatheringViewSet,
5151
)
52-
from common.views import DagsterProxyView
52+
from common.views import AssetDownloadView, DagsterProxyView
5353
from common.viewsets import (
5454
ClassifiedProductViewSet,
5555
CountryViewSet,
@@ -141,8 +141,15 @@
141141
name="livelihood-zone-baseline-faceted-search",
142142
),
143143
re_path(os.environ.get("DAGSTER_WEBSERVER_PREFIX", "pipelines") + r"/(?P<path>.*)", DagsterProxyView.as_view()),
144+
path("assetdownload/<str:asset_name>/", AssetDownloadView.as_view(), name="asset_download"),
145+
path(
146+
"assetdownload/<str:asset_name>/<str:partition_name>/",
147+
AssetDownloadView.as_view(),
148+
name="asset_download_partitioned",
149+
),
144150
]
145151

152+
146153
# Django's solution for translating JavaScript apps
147154
# Provides gettext translation functionality for Javascript clients (and ngettext, pgettext, iterpolate, etc.)
148155
# See: https://docs.djangoproject.com/en/4.2/topics/i18n/translation/#using-the-javascript-translation-catalog

pipelines/resources.py

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import json
2+
import os
23
import pickle
34
from abc import abstractmethod
45
from typing import Any, Optional, Type
6+
from urllib.parse import urljoin
57

68
import dagster._check as check
79
import pandas as pd
@@ -10,15 +12,49 @@
1012
DagsterInvariantViolationError,
1113
InitResourceContext,
1214
InputContext,
15+
MetadataValue,
1316
OutputContext,
1417
)
1518
from dagster._core.storage.upath_io_manager import UPathIOManager
1619
from dagster._utils import PICKLE_PROTOCOL
20+
from django.urls import reverse
1721
from pydantic.fields import Field
1822
from upath import UPath
1923

2024

21-
class PickleFilesystemIOManager(UPathIOManager):
25+
class DownloadAssetMixin:
26+
"""
27+
Mixin that adds download URL of the asset to the metadata.
28+
"""
29+
30+
def get_download_url(self, context: OutputContext) -> str:
31+
if not context or not getattr(context, "has_asset_key", False) or not context.has_asset_key:
32+
return ""
33+
34+
asset_name = context.asset_key.path[-1]
35+
root_url = os.getenv("BASELINE_EXPLORER_API_ROOT_URL", "http://localhost:8000")
36+
37+
kwargs = {"asset_name": asset_name}
38+
if context.has_partition_key:
39+
kwargs["partition_name"] = context.partition_key
40+
relative_url = reverse("asset_download_partitioned", kwargs=kwargs)
41+
else:
42+
relative_url = reverse("asset_download", kwargs=kwargs)
43+
44+
return urljoin(root_url, relative_url)
45+
46+
def handle_output(self, context: OutputContext, obj):
47+
download_url = self.get_download_url(context)
48+
if download_url:
49+
context.add_output_metadata(
50+
{
51+
"download": MetadataValue.url(download_url),
52+
}
53+
)
54+
super().handle_output(context, obj)
55+
56+
57+
class PickleFilesystemIOManager(DownloadAssetMixin, UPathIOManager):
2258
"""
2359
Dagster I/O Manager that serializes Python objects to files using Pickle.
2460
@@ -57,7 +93,7 @@ def load_from_path(self, context: InputContext, path: "UPath") -> Any:
5793
return pickle.load(file)
5894

5995

60-
class JSONFilesystemIOManager(UPathIOManager):
96+
class JSONFilesystemIOManager(DownloadAssetMixin, UPathIOManager):
6197
"""
6298
Dagster I/O Manager that serializes Python objects to JSON files.
6399
"""
@@ -77,7 +113,7 @@ def load_from_path(self, context: InputContext, path: "UPath") -> Any:
77113
return json.loads(file.read())
78114

79115

80-
class DataFrameCSVFilesystemIOManager(UPathIOManager):
116+
class DataFrameCSVFilesystemIOManager(DownloadAssetMixin, UPathIOManager):
81117
"""
82118
Dagster I/O Manager that serializes DataFrames to CSV files.
83119
"""
@@ -95,7 +131,7 @@ def load_from_path(self, context: InputContext, path: UPath) -> pd.DataFrame:
95131
return pd.read_csv(path)
96132

97133

98-
class DataFrameExcelFilesystemIOManager(UPathIOManager):
134+
class DataFrameExcelFilesystemIOManager(DownloadAssetMixin, UPathIOManager):
99135
"""
100136
Dagster I/O Manager that serializes DataFrames to Excel .xlsx using the OpenPyXL engine.
101137

0 commit comments

Comments
 (0)