Skip to content
Open
1 change: 1 addition & 0 deletions admin/base/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
re_path(r'^cedar_metadata_templates/', include('admin.cedar.urls', namespace='cedar_metadata_templates')),
re_path(r'^draft_registrations/', include('admin.draft_registrations.urls', namespace='draft_registrations')),
re_path(r'^files/', include('admin.files.urls', namespace='files')),
re_path(r'^share_reindex/', include('admin.share_reindex.urls', namespace='share_reindex')),
]),
),
]
Expand Down
Empty file added admin/share_reindex/__init__.py
Empty file.
9 changes: 9 additions & 0 deletions admin/share_reindex/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from django.urls import re_path
from . import views

app_name = 'admin'

urlpatterns = [
re_path(r'^$', views.FailedShareIndexedGuidList.as_view(), name='list'),
re_path(r'^(?P<resource_type>[^/]+)/$', views.FailedShareIndexedGuidReindex.as_view(), name='reindex-share-resource'),
]
52 changes: 52 additions & 0 deletions admin/share_reindex/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from django.contrib.auth.mixins import PermissionRequiredMixin
from django.urls import reverse
from django.shortcuts import redirect
from django.views.generic import ListView, View
from osf.models import Guid
from urllib.parse import urlencode
from api.share.utils import get_not_indexed_guids_for_resource_with_no_indexed_guid, task__reindex_failed_or_not_indexed_resource_into_share

class FailedShareIndexedGuidList(PermissionRequiredMixin, ListView):
paginate_by = 25
template_name = 'share_reindex/list.html'
permission_required = 'osf.update_share_reindex'
raise_exception = True
model = Guid

def get_queryset(self):
resource_type = self.request.GET.get('type', 'projects')
return get_not_indexed_guids_for_resource_with_no_indexed_guid(resource_type)

def get_context_data(self, **kwargs):
query_set = kwargs.pop('object_list', self.object_list)
page_size = self.get_paginate_by(query_set)
paginator, page, query_set, is_paginated = self.paginate_queryset(query_set, page_size)
kwargs.setdefault('items_to_index', query_set)
kwargs.setdefault('page', page)
resource_type = self.request.GET.get('type', 'projects')
kwargs.setdefault('selected_resource_type', resource_type)
resource_type_detail_mapping = {
'users': 'users:user', 'preprints': 'preprints:preprint', 'registries': 'nodes:node', 'projects': 'nodes:node', 'files': 'files:file'
}

kwargs.setdefault('resource_detail', resource_type_detail_mapping.get(resource_type))
resource_type_guid_reindex = {
'users': 'users:reindex-share-user', 'preprints': 'preprints:reindex-share-preprint', 'registries': 'nodes:reindex-share-node', 'projects': 'nodes:reindex-share-node'
}
kwargs.setdefault('resource_guid_reindex', resource_type_guid_reindex.get(resource_type))
status_msg = f'Reindex of {resource_type} started, please check later.' if self.request.GET.get('status') == 'indexing' else ''
kwargs.setdefault('share_reindex_message', status_msg)
return super().get_context_data(**kwargs)


class FailedShareIndexedGuidReindex(PermissionRequiredMixin, View):
permission_required = 'osf.update_share_reindex'
raise_exception = True

def post(self, request, *args, **kwargs):
resource_type = self.kwargs.get('resource_type')
# reindex 100_000 guids in background task for specific resource_type and resource is public
task__reindex_failed_or_not_indexed_resource_into_share.delay(resource_type)
base_url = reverse('share_reindex:list')
query_string = urlencode({'type': resource_type, 'status': 'indexing'})
return redirect(f"{base_url}?{query_string}")
3 changes: 3 additions & 0 deletions admin/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,9 @@
{% if perms.osf.change_cedarmetadatatemplate %}
<li><a href="{% url 'cedar_metadata_templates:list' %}"><i class='fa fa-link'></i> <span>Cedar Metadata Templates</span></a></li>
{% endif %}
{% if perms.osf.update_share_reindex %}
<li><a href="{% url 'share_reindex:list' %}"><i class='fa fa-link'></i> <span>Share Reindex</span></a></li>
{% endif %}
{% if perms.osf.change_maintenancestate %}
<li><a href="{% url 'maintenance:display' %}"><i class='fa fa-link'></i> <span>Maintenance Alerts</span></a></li>
{% endif %}
Expand Down
133 changes: 133 additions & 0 deletions admin/templates/share_reindex/list.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
{% extends "base.html" %}
{% load render_bundle from webpack_loader %}
{% load comment_extras %}

{% load static %}
{% block top_includes %}
<link rel="stylesheet" type="text/css" href="/static/css/institutions.css" />
{% endblock %}
{% block title %}
<title>Share Reindex</title>
{% endblock title %}
{% block content %}
<h2>Share Reindex</h2>

{% include "util/pagination.html" with items=page extra_query_params="&type="|add:selected_resource_type %}


<div class="row" style="margin-bottom: 20px;">
<div class="col-md-3">
<form method="GET" action="">
<select class="form-control" name="type" onchange="this.form.submit()">
<option value="projects" {% if selected_resource_type == 'projects' %}selected{% endif %}>Projects</option>
<option value="preprints" {% if selected_resource_type == 'preprints' %}selected{% endif %}>Preprints</option>
<option value="registries" {% if selected_resource_type == 'registries' %}selected{% endif %}>Registries</option>
<option value="users" {% if selected_resource_type == 'users' %}selected{% endif %}>Users</option>
<option value="files" {% if selected_resource_type == 'files' %}selected{% endif %}>Files</option>
</select>

</form>
</div>
<div class="col-md-3">
<a data-toggle="modal" data-target="#confirmReindexShareNodes" class="btn btn-primary">
SHARE Reindex All {{selected_resource_type}}
</a>

<div class="modal" id="confirmReindexShareNodes">
<div class="modal-dialog">
<div class="modal-content">

<form method="post" action="{% url 'admin:reindex-share-resource' resource_type=selected_resource_type %}">
{% csrf_token %}
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal">×</button>
<h3>Are you sure you want to reindex {{selected_resource_type}} (SHARE)?</h3>
</div>

<div class="modal-footer">
<button type="button" class="btn btn-default" data-dismiss="modal">Cancel</button>
<input class="btn btn-primary" type="submit" value="Confirm Re-index" />
</div>
</form>

</div>
</div>
</div>
</div>
</div>

<div>
<p>{{share_reindex_message}}</p>
</div>


<table class="table table-striped table-hover table-responsive">
<thead>
<tr>
<th>Guid</th>
{% if selected_resource_type == 'projects' or selected_resource_type == 'preprints' or selected_resource_type == 'registries' %}
<th>Title</th>
{% elif selected_resource_type == 'users' %}
<th>Fullname</th>
{% else %}
<th>Name</th>
{% endif %}
<th>Datetime Last Indexed</th>
<!-- there is no a file indexing option for detail page for now -->
{% if selected_resource_type != 'files' %}
<th>Reindex</th>
{% endif %}
</tr>
</thead>
<tbody>
{% for item in items_to_index %}
<tr>
<td>
<a href="{% url resource_detail guid=item.first_guid %}">
{{item.first_guid}}
</a>
</td>
{% if selected_resource_type == 'projects' or selected_resource_type == 'preprints' or selected_resource_type == 'registries' %}
<td>{{item.title}}</td>
{% elif selected_resource_type == 'users' %}
<td>{{item.fullname}}</td>
{% else %}
<th>{{item.name}}</th>
{% endif %}

<td>{{item.date_last_indexed}}</td>

{% if selected_resource_type != 'files' %}
<td>
<a data-toggle="modal" data-target="#confirmReindexShareNode-{{ item.first_guid }}" class="btn btn-primary">SHARE Reindex</a>
</td>
<div class="modal" id="confirmReindexShareNode-{{ item.first_guid }}">
<div class="modal-dialog">
<div class="modal-content">
<form class="well" method="post" action="{% url resource_guid_reindex guid=item.first_guid %}">
<div class="modal-header">
<button type="button" class="close" data-dismiss="modal">x</button>
<h3>Are you sure you want to reindex this node (SHARE)? {{ item.first_guid }}</h3>
</div>
{% csrf_token %}
<div class="modal-footer">
<input class="btn btn-danger" type="submit" value="Confirm" />
<button type="button" class="btn btn-default" data-dismiss="modal">
Cancel
</button>
</div>
</form>

</div>
{# Data from above link #}
</div>
</div>
{% endif %}


</tr>
{% endfor %}
</tbody>
</table>

{% endblock content %}
14 changes: 7 additions & 7 deletions admin/templates/util/pagination.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
<div class="pagination pagination-lg">
<span>
{% if items.has_previous %}
<a href="?page=1&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}"
<a href="?page=1&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}{{ extra_query_params }}"
class="btn btn-primary">
|
</a>
<a href="?page={{ items.previous_page_number }}&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}"
<a href="?page={{ items.previous_page_number }}&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}{{ extra_query_params }}"
class="btn btn-primary">
<i class="fa fa-angle-left"></i>
</a>
Expand All @@ -25,11 +25,11 @@
</span>

{% if items.has_next %}
<a href="?page={{ items.next_page_number }}&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}"
<a href="?page={{ items.next_page_number }}&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}{{ extra_query_params }}"
class="btn btn-primary">
<i class="fa fa-angle-right"></i>
</a>
<a href="?page={{ items.paginator.num_pages }}&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}"
<a href="?page={{ items.paginator.num_pages }}&amp;status={{ status }}&amp;p={{ pagin }}&amp;order_by={{ order }}{{ extra_query_params }}"
class="btn btn-primary">
|
</a>
Expand All @@ -44,11 +44,11 @@
</span>
{% if pagin %}
<span>
<a href="?p=10&amp;order_by={{ order }}&amp;status={{ status }}"
<a href="?p=10&amp;order_by={{ order }}&amp;status={{ status }}{{ extra_query_params }}"
class="btn btn-primary">10</a>
<a href="?p=25&amp;order_by={{ order }}&amp;status={{ status }}"
<a href="?p=25&amp;order_by={{ order }}&amp;status={{ status }}{{ extra_query_params }}"
class="btn btn-primary">25</a>
<a href="?p=50&amp;order_by={{ order }}&amp;status={{ status }}"
<a href="?p=50&amp;order_by={{ order }}&amp;status={{ status }}{{ extra_query_params }}"
class="btn btn-primary">50</a>
</span>
{% endif %}
Expand Down
38 changes: 38 additions & 0 deletions api/share/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
import logging

from django.apps import apps
from django.db.models import Q, OuterRef, Subquery
from django.contrib.contenttypes.models import ContentType
from celery.utils.time import get_exponential_backoff_interval
import requests


from framework.celery_tasks import app as celery_app
from framework.celery_tasks.handlers import enqueue_task
from framework.encryption import ensure_bytes
Expand Down Expand Up @@ -80,6 +83,7 @@ def task__update_share(self, guid: str, is_backfill=False, osfmap_partition_name
raise ValueError(f'unknown osfguid "{guid}"')
_resource = _osfid_instance.referent
_is_deletion = _should_delete_indexcard(_resource)
_resource.mark_indexing_failed()
try:
_response = (
pls_delete_trove_record(_resource, osfmap_partition=_osfmap_partition)
Expand Down Expand Up @@ -115,6 +119,7 @@ def task__update_share(self, guid: str, is_backfill=False, osfmap_partition_name
if HTTPStatus(_response.status_code).is_server_error:
raise self.retry(exc=e)
else: # success response
_resource.mark_indexing_success()
if not _is_deletion:
# enqueue followup task for supplementary metadata
_next_partition = _next_osfmap_partition(_osfmap_partition)
Expand All @@ -126,6 +131,39 @@ def task__update_share(self, guid: str, is_backfill=False, osfmap_partition_name
)


@celery_app.task
def task__reindex_failed_or_not_indexed_resource_into_share(resource_type: str, start_id: int = 0, chunk_count: int = 200, chunk_size: int = 500):
from osf.management.commands.recatalog_metadata import recatalog
queryset = get_not_indexed_guids_for_resource_with_no_indexed_guid(resource_type, first_guid=False)
# chunk count and chunk size up to discussion what will be better with Cloud Team
recatalog(queryset, start_id, chunk_count, chunk_size)


def get_not_indexed_guids_for_resource_with_no_indexed_guid(resource_type: str, first_guid: bool = True):
from osf.models import Guid, Registration, Preprint, Node, OSFUser
from addons.osfstorage.models import OsfStorageFile
common_not_indexed_public_resource_extract_query = (
Q(is_public=True) & Q(deleted__isnull=True) &
(Q(has_been_indexed=False) | Q(has_been_indexed__isnull=True))
)
resource_mapper = {
'projects': (Node, common_not_indexed_public_resource_extract_query, ('first_guid', 'date_last_indexed', 'title')),
'preprints': (Preprint, common_not_indexed_public_resource_extract_query & Q(is_published=True), ('first_guid', 'date_last_indexed', 'title')),
'registries': (Registration, common_not_indexed_public_resource_extract_query, ('first_guid', 'date_last_indexed', 'title')),
'users': (OSFUser, Q(is_active=True) & Q(deleted__isnull=True) & (Q(has_been_indexed=False) | Q(has_been_indexed__isnull=True)), ('first_guid', 'fullname', 'date_last_indexed')),
'files': (OsfStorageFile, Q(deleted__isnull=True), ('first_guid', 'name', 'date_last_indexed')),
}
resource_model, query, values_to_return = resource_mapper.get(resource_type, 'projects')
if first_guid:
model_content_type = ContentType.objects.get_for_model(resource_model)
first_guid_sq = Guid.objects.filter(
content_type=model_content_type,
object_id=OuterRef('pk'),
).order_by('created').values('_id')[:1]
return resource_model.objects.filter(query).annotate(first_guid=Subquery(first_guid_sq)).exclude(first_guid__isnull=True).values(*values_to_return)
return resource_model.objects.filter(query)


def pls_send_trove_record(osf_item, *, is_backfill: bool, osfmap_partition: OsfmapPartition):
try:
_iri = osf_item.get_semantic_iri()
Expand Down
53 changes: 53 additions & 0 deletions osf/migrations/0036_abstractnode_date_last_indexed_and_more.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Generated by Django 4.2.26 on 2026-03-13 11:46

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('osf', '0035_merge_20251215_1451'),
]

operations = [
migrations.AddField(
model_name='abstractnode',
name='date_last_indexed',
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name='abstractnode',
name='has_been_indexed',
field=models.BooleanField(blank=True, db_index=True, default=None, null=True),
),
migrations.AddField(
model_name='basefilenode',
name='date_last_indexed',
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name='basefilenode',
name='has_been_indexed',
field=models.BooleanField(blank=True, db_index=True, default=None, null=True),
),
migrations.AddField(
model_name='osfuser',
name='date_last_indexed',
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name='osfuser',
name='has_been_indexed',
field=models.BooleanField(blank=True, db_index=True, default=None, null=True),
),
migrations.AddField(
model_name='preprint',
name='date_last_indexed',
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name='preprint',
name='has_been_indexed',
field=models.BooleanField(blank=True, db_index=True, default=None, null=True),
),
]
4 changes: 2 additions & 2 deletions osf/models/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from framework import sentry
from .base import BaseModel, OptionalGuidMixin, ObjectIDMixin
from .comment import CommentableMixin
from .mixins import Taggable
from .mixins import Taggable, ShareIndexMixin
from .validators import validate_location
from osf.utils.datetime_aware_jsonfield import DateTimeAwareJSONField
from osf.utils.fields import NonNaiveDateTimeField
Expand Down Expand Up @@ -64,7 +64,7 @@ class UnableToResolveFileClass(Exception):
pass


class BaseFileNode(TypedModel, CommentableMixin, OptionalGuidMixin, Taggable, ObjectIDMixin, BaseModel):
class BaseFileNode(TypedModel, CommentableMixin, OptionalGuidMixin, Taggable, ObjectIDMixin, ShareIndexMixin, BaseModel):
"""Base class for all provider-specific file models and the trashed file model.
This class should generally not be used or created manually. Use the provider-specific
subclasses instead.
Expand Down
Loading
Loading