diff --git a/addons/base/views.py b/addons/base/views.py index dbb3c1a3072..89951d905f8 100644 --- a/addons/base/views.py +++ b/addons/base/views.py @@ -310,7 +310,8 @@ def decrypt_and_decode_jwt_payload(): def get_authenticated_resource(resource_id): - resource = AbstractNode.load(resource_id) or Preprint.load(resource_id) + resource, _ = Guid.load_referent(resource_id) + if not resource: raise HTTPError(http_status.HTTP_404_NOT_FOUND, message='Resource not found.') @@ -390,8 +391,7 @@ def get_auth(auth, **kwargs): waterbutler_data = decrypt_and_decode_jwt_payload() # Authenticate the resource based on the node_id and handle potential draft nodes - resource = get_authenticated_resource(waterbutler_data['nid']) - + resource = get_authenticated_resource(waterbutler_data.get('nid')) # Authenticate the user using cookie or Oauth if possible authenticate_user_if_needed(auth, waterbutler_data, resource) @@ -529,9 +529,9 @@ def create_waterbutler_log(payload, **kwargs): auth = payload['auth'] # Don't log download actions if payload['action'] in DOWNLOAD_ACTIONS: - guid = Guid.load(payload['metadata'].get('nid')) - if guid: - node = guid.referent + guid_id = payload['metadata'].get('nid') + + node, _ = Guid.load_referent(guid_id) return {'status': 'success'} user = OSFUser.load(auth['id']) @@ -890,7 +890,7 @@ def addon_view_or_download_file(auth, path, provider, **kwargs): extras.pop('_', None) # Clean up our url params a bit action = extras.get('action', 'view') guid = kwargs.get('guid') - guid_target = getattr(Guid.load(guid), 'referent', None) + guid_target, _ = Guid.load_referent(guid) target = guid_target or kwargs.get('node') or kwargs['project'] provider_safe = markupsafe.escape(provider) diff --git a/addons/osfstorage/decorators.py b/addons/osfstorage/decorators.py index a45fe392a3b..2f27d98bf5d 100644 --- a/addons/osfstorage/decorators.py +++ b/addons/osfstorage/decorators.py @@ -33,8 +33,8 @@ def load_guid_as_target(func): @functools.wraps(func) def wrapped(*args, **kwargs): - guid = kwargs.get('guid') - target = getattr(Guid.load(guid), 'referent', None) + target, _ = Guid.load_referent(kwargs.get('guid')) + if not target: raise HTTPError( http_status.HTTP_404_NOT_FOUND, @@ -88,7 +88,9 @@ def wrapped(payload, *args, **kwargs): user = OSFUser.load(payload['user']) # Waterbutler is sending back ['node'] under the destination payload - WB should change to target target = payload['destination'].get('target') or payload['destination'].get('node') - dest_target = Guid.load(target).referent + dest_target, _ = Guid.load_referent(target) + if not dest_target: + raise HTTPError(http_status.HTTP_404_NOT_FOUND) source = OsfStorageFileNode.get(payload['source'], kwargs['target']) dest_parent = OsfStorageFolder.get(payload['destination']['parent'], dest_target) diff --git a/addons/osfstorage/listeners.py b/addons/osfstorage/listeners.py index ba56f46b254..5b4de43aa16 100644 --- a/addons/osfstorage/listeners.py +++ b/addons/osfstorage/listeners.py @@ -24,7 +24,7 @@ def checkin_files_task(node_id, user_id): Guid = apps.get_model('osf.Guid') OSFUser = apps.get_model('osf.OSFUser') - node = Guid.load(node_id).referent + node, _ = Guid.load_referent(node_id) assert isinstance(node, (AbstractNode, Preprint)) user = OSFUser.load(user_id) diff --git a/addons/osfstorage/utils.py b/addons/osfstorage/utils.py index 6b69681f526..c2331d50b17 100644 --- a/addons/osfstorage/utils.py +++ b/addons/osfstorage/utils.py @@ -9,7 +9,7 @@ from framework.celery_tasks import app from framework.postcommit_tasks.handlers import enqueue_postcommit_task from framework.sessions import get_session -from osf.models import BaseFileNode, Guid +from osf.models import BaseFileNode, Guid, Preprint from addons.osfstorage import settings @@ -21,7 +21,7 @@ def enqueue_update_analytics(node, file, version_idx, action='download'): @app.task(max_retries=5, default_retry_delay=60) def update_analytics_async(node_id, file_id, version_idx, session_key=None, action='download'): - node = Guid.load(node_id).referent + node, _ = Guid.load_referent(node_id) file = BaseFileNode.load(file_id) update_analytics(node, file, version_idx, session_key, action) @@ -43,7 +43,7 @@ def update_analytics(node, file, version_idx, session_key, action='download'): node_info = { 'contributors': contributors } - resource = node.guids.first() + resource = node.get_guid() if isinstance(node, Preprint) else node.guids.first() update_counter(resource, file, version=None, action=action, node_info=node_info, session_key=session_key) update_counter(resource, file, version_idx, action, node_info=node_info, session_key=session_key) diff --git a/addons/osfstorage/views.py b/addons/osfstorage/views.py index ed9516fe839..a448f3c6edd 100644 --- a/addons/osfstorage/views.py +++ b/addons/osfstorage/views.py @@ -187,9 +187,12 @@ def osfstorage_get_metadata(file_node, **kwargs): @decorators.autoload_filenode(must_be='folder') def osfstorage_get_children(file_node, **kwargs): from django.contrib.contenttypes.models import ContentType + from osf.models.preprint import Preprint user_id = request.args.get('user_id') user_content_type_id = ContentType.objects.get_for_model(OSFUser).id user_pk = OSFUser.objects.filter(guids___id=user_id, guids___id__isnull=False).values_list('pk', flat=True).first() + guid_id = file_node.target.get_guid().id if isinstance(file_node.target, Preprint) else file_node.target.guids.first().id, + with connection.cursor() as cursor: # Read the documentation on FileVersion's fields before reading this code cursor.execute(""" @@ -281,7 +284,7 @@ def osfstorage_get_children(file_node, **kwargs): AND (NOT F.type IN ('osf.trashedfilenode', 'osf.trashedfile', 'osf.trashedfolder')) """, [ user_content_type_id, - file_node.target.guids.first().id, + guid_id, user_pk, user_pk, user_id, diff --git a/admin/preprints/urls.py b/admin/preprints/urls.py index cec79891134..4cb30c1b787 100644 --- a/admin/preprints/urls.py +++ b/admin/preprints/urls.py @@ -9,24 +9,24 @@ re_path(r'^known_spam$', views.PreprintKnownSpamList.as_view(), name='known-spam'), re_path(r'^known_ham$', views.PreprintKnownHamList.as_view(), name='known-ham'), re_path(r'^withdrawal_requests$', views.PreprintWithdrawalRequestList.as_view(), name='withdrawal-requests'), - re_path(r'^(?P[a-z0-9]+)/$', views.PreprintView.as_view(), name='preprint'), - re_path(r'^(?P[a-z0-9]+)/change_provider/$', views.PreprintProviderChangeView.as_view(), name='preprint-provider'), - re_path(r'^(?P[a-z0-9]+)/machine_state/$', views.PreprintMachineStateView.as_view(), name='preprint-machine-state'), - re_path(r'^(?P[a-z0-9]+)/reindex_share_preprint/$', views.PreprintReindexShare.as_view(), + re_path(r'^(?P[A-Za-z0-9_]+)/$', views.PreprintView.as_view(), name='preprint'), + re_path(r'^(?P[A-Za-z0-9_]+)/change_provider/$', views.PreprintProviderChangeView.as_view(), name='preprint-provider'), + re_path(r'^(?P[A-Za-z0-9_]+)/machine_state/$', views.PreprintMachineStateView.as_view(), name='preprint-machine-state'), + re_path(r'^(?P[A-Za-z0-9_]+)/reindex_share_preprint/$', views.PreprintReindexShare.as_view(), name='reindex-share-preprint'), - re_path(r'^(?P[a-z0-9]+)/remove_user/(?P[a-z0-9]+)/$', views.PreprintRemoveContributorView.as_view(), + re_path(r'^(?P[A-Za-z0-9_]+)/remove_user/(?P[a-z0-9]+)/$', views.PreprintRemoveContributorView.as_view(), name='remove-user'), - re_path(r'^(?P[a-z0-9]+)/make_private/$', views.PreprintMakePrivate.as_view(), name='make-private'), - re_path(r'^(?P[a-z0-9]+)/make_public/$', views.PreprintMakePublic.as_view(), name='make-public'), - re_path(r'^(?P[a-z0-9]+)/remove/$', views.PreprintDeleteView.as_view(), name='remove'), - re_path(r'^(?P[a-z0-9]+)/restore/$', views.PreprintDeleteView.as_view(), name='restore'), - re_path(r'^(?P[a-z0-9]+)/confirm_unflag/$', views.PreprintConfirmUnflagView.as_view(), name='confirm-unflag'), - re_path(r'^(?P[a-z0-9]+)/confirm_spam/$', views.PreprintConfirmSpamView.as_view(), name='confirm-spam'), - re_path(r'^(?P[a-z0-9]+)/confirm_ham/$', views.PreprintConfirmHamView.as_view(), name='confirm-ham'), - re_path(r'^(?P[a-z0-9]+)/reindex_elastic_preprint/$', views.PreprintReindexElastic.as_view(), + re_path(r'^(?P[A-Za-z0-9_]+)/make_private/$', views.PreprintMakePrivate.as_view(), name='make-private'), + re_path(r'^(?P[A-Za-z0-9_]+)/make_public/$', views.PreprintMakePublic.as_view(), name='make-public'), + re_path(r'^(?P[A-Za-z0-9_]+)/remove/$', views.PreprintDeleteView.as_view(), name='remove'), + re_path(r'^(?P[A-Za-z0-9_]+)/restore/$', views.PreprintDeleteView.as_view(), name='restore'), + re_path(r'^(?P[A-Za-z0-9_]+)/confirm_unflag/$', views.PreprintConfirmUnflagView.as_view(), name='confirm-unflag'), + re_path(r'^(?P[A-Za-z0-9_]+)/confirm_spam/$', views.PreprintConfirmSpamView.as_view(), name='confirm-spam'), + re_path(r'^(?P[A-Za-z0-9_]+)/confirm_ham/$', views.PreprintConfirmHamView.as_view(), name='confirm-ham'), + re_path(r'^(?P[A-Za-z0-9_]+)/reindex_elastic_preprint/$', views.PreprintReindexElastic.as_view(), name='reindex-elastic-preprint'), - re_path(r'^(?P[a-z0-9]+)/approve_withdrawal/$', views.PreprintApproveWithdrawalRequest.as_view(), + re_path(r'^(?P[A-Za-z0-9_]+)/approve_withdrawal/$', views.PreprintApproveWithdrawalRequest.as_view(), name='approve-withdrawal'), - re_path(r'^(?P[a-z0-9]+)/reject_withdrawal/$', views.PreprintRejectWithdrawalRequest.as_view(), + re_path(r'^(?P[A-Za-z0-9_]+)/reject_withdrawal/$', views.PreprintRejectWithdrawalRequest.as_view(), name='reject-withdrawal'), ] diff --git a/admin/preprints/views.py b/admin/preprints/views.py index 80f6da1f059..d70630963dd 100644 --- a/admin/preprints/views.py +++ b/admin/preprints/views.py @@ -48,7 +48,7 @@ class PreprintMixin(PermissionRequiredMixin): def get_object(self): - preprint = Preprint.objects.get(guids___id=self.kwargs['guid']) + preprint = Preprint.load(self.kwargs['guid']) # Django template does not like attributes with underscores for some reason preprint.guid = preprint._id return preprint @@ -324,9 +324,10 @@ class WithdrawalRequestMixin(PermissionRequiredMixin): permission_required = 'osf.change_preprintrequest' def get_object(self): + target = Preprint.load(self.kwargs['guid']) return PreprintRequest.objects.filter( request_type='withdrawal', - target__guids___id=self.kwargs['guid'], + target=target, ).first() def get_success_url(self): diff --git a/api/actions/views.py b/api/actions/views.py index 5436d7aaea1..9e1d6d76ebb 100644 --- a/api/actions/views.py +++ b/api/actions/views.py @@ -6,7 +6,7 @@ from api.actions.permissions import ReviewActionPermission from api.actions.serializers import NodeRequestActionSerializer, ReviewActionSerializer, PreprintRequestActionSerializer from api.base.exceptions import Conflict -from api.base.filters import ListFilterMixin +from api.base.filters import TargetFilterMixin from api.base.views import JSONAPIBaseView from api.base.parsers import ( JSONAPIMultipleRelationshipsParser, @@ -110,7 +110,7 @@ def get_object(self): return action -class ReviewActionListCreate(JSONAPIBaseView, generics.ListCreateAPIView, ListFilterMixin): +class ReviewActionListCreate(JSONAPIBaseView, generics.ListCreateAPIView, TargetFilterMixin): """List of review actions viewable by this user Actions represent state changes and/or comments on a reviewable object (e.g. a preprint) diff --git a/api/base/filters.py b/api/base/filters.py index 2032ed80583..4074ed0374c 100644 --- a/api/base/filters.py +++ b/api/base/filters.py @@ -19,8 +19,9 @@ from rest_framework import serializers as ser from rest_framework.filters import OrderingFilter from osf.models import Subject, Preprint -from osf.models.base import GuidMixin +from osf.models.base import GuidMixin, Guid from functools import cmp_to_key +from framework import sentry def lowercase(lower): if hasattr(lower, '__call__'): @@ -568,24 +569,102 @@ def get_serializer_method(self, field_name): class PreprintFilterMixin(ListFilterMixin): - """View mixin that uses ListFilterMixin, adding postprocessing for preprint querying + """View mixin for many preprint listing views. It inherits from ListFilterMixin and customize postprocessing for + preprint querying by provider, subjects and versioned `_id`. - Subclasses must define `get_default_queryset()`. + Note: Subclasses must define `get_default_queryset()`. """ + + @staticmethod + def postprocess_versioned_guid_id_query_param(operation): + """Handle query parameters when filtering on `_id` for preprint which are now versioned. + + Note: preprint achieves versioning by using versioned guid, and thus no long has guid or guid._id for every + version. Must convert `guid___id__in=` look-up to `pk__in` look-up. + """ + preprint_pk_list = [] + for val in operation['value']: + referent, version = Guid.load_referent(val) + if referent is None: + continue + preprint_pk_list.append(referent.id) + # Override the operation to filter `id__in=preprint_pk_list` + operation['source_field_name'] = 'id__in' + operation['value'] = preprint_pk_list + operation['op'] = 'eq' + def postprocess_query_param(self, key, field_name, operation): if field_name == 'provider': operation['source_field_name'] = 'provider___id' if field_name == 'id': - operation['source_field_name'] = 'guids___id' + PreprintFilterMixin.postprocess_versioned_guid_id_query_param(operation) if field_name == 'subjects': self.postprocess_subject_query_param(operation) - def preprints_queryset(self, base_queryset, auth_user, allow_contribs=True, public_only=False): - return Preprint.objects.can_view( + def preprints_queryset(self, base_queryset, auth_user, allow_contribs=True, public_only=False, latest_only=False): + preprints = Preprint.objects.can_view( base_queryset=base_queryset, user=auth_user, allow_contribs=allow_contribs, public_only=public_only, ) + if latest_only: + preprints = preprints.filter(guids__isnull=False) + return preprints + + +class TargetFilterMixin(ListFilterMixin): + """View mixin for multi-content-type list views (e.g. `ReviewActionListCreate`). It inherits from `ListFilterMixin` + and customizes the postprocessing of the `target` field when target is a preprint. + + Note: Subclasses must define `get_default_queryset()`. + """ + + @staticmethod + def postprocess_preprint_as_target_query_param(operation, target_pk): + """When target is a preprint, which must be versioned, the traditional non-versioned `guid___id==_id` + look-up no longer works. Must convert it to PK look-up `target__id==pk`. + """ + # Override the operation to filter `target__id==pk` + operation['source_field_name'] = 'target__id' + operation['value'] = target_pk + operation['op'] = 'eq' + + def postprocess_query_param(self, key, field_name, operation): + """Handles a special case when filtering on `target` when `target` is a Preprint. + """ + if field_name == 'target': + referent, version = Guid.load_referent(operation['value']) + if referent: + if version: + TargetFilterMixin.postprocess_preprint_as_target_query_param(operation, referent.id) + else: + super().postprocess_query_param(key, field_name, operation) + else: + sentry.log_message(f'Target object invalid or not found: [target={operation['value']}]') + return + else: + super().postprocess_query_param(key, field_name, operation) + + +class PreprintAsTargetFilterMixin(TargetFilterMixin): + """View mixin for preprint related list views (e.g. `PreprintProviderWithdrawRequestList` and `PreprintActionList`). + It inherits from `TargetFilterMixin` and customizes postprocessing the `target` field for preprint. + + Note: Subclasses must define `get_default_queryset()`. + """ + + def postprocess_query_param(self, key, field_name, operation): + """Handles a special case when filtering on `target`. + """ + if field_name == 'target': + referent, version = Guid.load_referent(operation['value']) + # A valid preprint must have referent and version + if not referent or not version: + sentry.log_message(f'Preprint invalid or note found: [target={operation['value']}]') + return + TargetFilterMixin.postprocess_preprint_as_target_query_param(operation, referent.id) + else: + super().postprocess_query_param(key, field_name, operation) diff --git a/api/base/pagination.py b/api/base/pagination.py index 676f0baa8fb..a831673937c 100644 --- a/api/base/pagination.py +++ b/api/base/pagination.py @@ -238,8 +238,7 @@ def get_paginated_response(self, data): class PreprintContributorPagination(NodeContributorPagination): def get_resource(self, kwargs): - resource_id = kwargs.get('preprint_id') - return Preprint.load(resource_id) + return Preprint.load(kwargs.get('preprint_id')) class DraftRegistrationContributorPagination(NodeContributorPagination): diff --git a/api/base/utils.py b/api/base/utils.py index 1da52026d7e..7bbfa207fa0 100644 --- a/api/base/utils.py +++ b/api/base/utils.py @@ -16,7 +16,7 @@ from framework.auth import Auth from framework.auth.cas import CasResponse from framework.auth.oauth_scopes import ComposedScopes, normalize_scopes -from osf.models.base import GuidMixin +from osf.models.base import GuidMixin, VersionedGuidMixin from osf.utils.requests import check_select_for_update from website import settings as website_settings from website import util as website_util # noqa @@ -103,7 +103,13 @@ def get_object_or_error(model_or_qs, query_or_pk=None, request=None, display_nam raise NotFound elif isinstance(query_or_pk, str): - # they passed a 5-char guid as a string + # If the class is a subclass of `VersionedGuidMixin`, get obj directly from model using `.load()`. The naming + # for `query_or_pk` no longer matches the actual case. It is neither a query nor a pk, but a guid str. + if issubclass(model_cls, VersionedGuidMixin): + obj = model_cls.load(query_or_pk, select_for_update=select_for_update) + # If the class is a subclass of `GuidMixin` (except for `VersionedGuidMixin`), turn it into a query dictionary. + # The naming for `query_or_pk` no longer matches the actual case either. It is neither a query nor a pk, but a + # 5-char guid str. We should be able to use the `.load()` the same way as in the `VersionedGuidMixin` case. if issubclass(model_cls, GuidMixin): # if it's a subclass of GuidMixin we know it's primary_identifier_name query = {'guids___id': query_or_pk} diff --git a/api/chronos/urls.py b/api/chronos/urls.py index 58d41478657..b5fb4c93907 100644 --- a/api/chronos/urls.py +++ b/api/chronos/urls.py @@ -7,6 +7,6 @@ urlpatterns = [ re_path(r'^journals/$', views.ChronosJournalList.as_view(), name=views.ChronosJournalList.view_name), re_path(r'^journals/(?P[-0-9A-Za-z]+)/$', views.ChronosJournalDetail.as_view(), name=views.ChronosJournalDetail.view_name), - re_path(r'^(?P\w+)/submissions/$', views.ChronosSubmissionList.as_view(), name=views.ChronosSubmissionList.view_name), - re_path(r'^(?P\w+)/submissions/(?P[-0-9A-Za-z]+)/$', views.ChronosSubmissionDetail.as_view(), name=views.ChronosSubmissionDetail.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/submissions/$', views.ChronosSubmissionList.as_view(), name=views.ChronosSubmissionList.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/submissions/(?P[-0-9A-Za-z]+)/$', views.ChronosSubmissionDetail.as_view(), name=views.ChronosSubmissionDetail.view_name), ] diff --git a/api/chronos/views.py b/api/chronos/views.py index 2522f57d0bb..d304e0e6dfa 100644 --- a/api/chronos/views.py +++ b/api/chronos/views.py @@ -91,8 +91,9 @@ def get_serializer_class(self): def get_default_queryset(self): user = get_user_auth(self.request).user - preprint_contributors = Preprint.load(self.kwargs['preprint_id'])._contributors - queryset = ChronosSubmission.objects.filter(preprint__guids___id=self.kwargs['preprint_id']) + preprint = Preprint.load(self.kwargs['preprint_id']) + preprint_contributors = preprint._contributors + queryset = ChronosSubmission.objects.filter(preprint=preprint) # Get the list of stale submissions and queue a task to update them update_list_id = queryset.filter( diff --git a/api/collections/views.py b/api/collections/views.py index 7b25d014862..53d29fb9113 100644 --- a/api/collections/views.py +++ b/api/collections/views.py @@ -71,14 +71,17 @@ def get_collection(self, check_object_permissions=True): self.check_object_permissions(self.request, collection) return collection - def collection_preprints(self, collection, user): - return Preprint.objects.can_view( + def collection_preprints(self, collection, user, latest_only=False): + preprints = Preprint.objects.can_view( Preprint.objects.filter( guids__in=collection.active_guids, deleted__isnull=True, ), user=user, ) + if latest_only: + preprints = preprints.filter(guids__isnull=False) + return preprints def get_collection_submission(self, check_object_permissions=True): collection_submission = get_object_or_error( @@ -685,7 +688,7 @@ class LinkedPreprintsList(BaseLinkedList, CollectionMixin): def get_queryset(self): auth = get_user_auth(self.request) - return self.collection_preprints(self.get_collection(), auth.user) + return self.collection_preprints(self.get_collection(), auth.user, latest_only=True) # overrides APIView def get_parser_context(self, http_request): diff --git a/api/guids/views.py b/api/guids/views.py index 8cd6779ba62..de9171a0451 100644 --- a/api/guids/views.py +++ b/api/guids/views.py @@ -49,9 +49,10 @@ def get_serializer_class(self): return None def get_object(self): + base_guid_str, _ = Guid.split_guid(self.kwargs['guids']) return get_object_or_error( Guid, - self.kwargs['guids'], + base_guid_str, self.request, display_name='guid', ) diff --git a/api/nodes/views.py b/api/nodes/views.py index 93879e2d40a..2b616716bd8 100644 --- a/api/nodes/views.py +++ b/api/nodes/views.py @@ -2228,7 +2228,7 @@ def get_default_queryset(self): node = self.get_node() # Permissions on the node are handled by the permissions_classes # Permissions on the list objects are handled by the query - return self.preprints_queryset(node.preprints.all(), auth_user) + return self.preprints_queryset(node.preprints.all(), auth_user, latest_only=True) def get_queryset(self): return self.get_queryset_from_request() diff --git a/api/preprints/serializers.py b/api/preprints/serializers.py index b8ad259aa3e..5199fb78848 100644 --- a/api/preprints/serializers.py +++ b/api/preprints/serializers.py @@ -2,7 +2,8 @@ from rest_framework import exceptions from rest_framework import serializers as ser from rest_framework.fields import empty -from rest_framework.exceptions import ValidationError as DRFValidationError +from rest_framework.exceptions import NotFound, PermissionDenied, ValidationError as DRFValidationError + from website import settings from api.base.exceptions import Conflict, JSONAPIException @@ -38,7 +39,7 @@ from api.base.metrics import MetricsSerializerMixin from api.institutions.utils import update_institutions_if_user_associated from api.taxonomies.serializers import TaxonomizableSerializerMixin -from framework.exceptions import PermissionsError +from framework.exceptions import PermissionsError, UnpublishedPendingPreprintVersionExists from website.project import signals as project_signals from osf.exceptions import NodeStateError, PreprintStateError from osf.models import ( @@ -142,6 +143,8 @@ class PreprintSerializer(TaxonomizableSerializerMixin, MetricsSerializerMixin, J ) reviews_state = ser.CharField(source='machine_state', read_only=True, max_length=15) date_last_transitioned = NoneIfWithdrawal(VersionedDateTimeField(read_only=True)) + version = ser.IntegerField(read_only=True) + is_latest_version = ser.BooleanField(read_only=True) citation = NoneIfWithdrawal( RelationshipField( @@ -221,6 +224,7 @@ class PreprintSerializer(TaxonomizableSerializerMixin, MetricsSerializerMixin, J 'self': 'get_preprint_url', 'html': 'get_absolute_html_url', 'doi': 'get_article_doi_url', + 'preprint_versions': 'get_preprint_versions', 'preprint_doi': 'get_preprint_doi_url', }, ) @@ -253,11 +257,21 @@ def subjects_self_view(self): # Overrides TaxonomizableSerializerMixin return 'preprints:preprint-relationships-subjects' + def get_preprint_versions(self, obj): + return absolute_reverse( + 'preprints:preprint-versions', + kwargs={ + 'preprint_id': obj._id, + 'version': self.context['request'].parser_context['kwargs']['version'], + }, + ) + def get_preprint_url(self, obj): return absolute_reverse( - 'preprints:preprint-detail', kwargs={ - 'preprint_id': obj._id, 'version': - self.context['request'].parser_context['kwargs']['version'], + 'preprints:preprint-detail', + kwargs={ + 'preprint_id': obj._id, + 'version': self.context['request'].parser_context['kwargs']['version'], }, ) @@ -282,7 +296,7 @@ def get_preprint_doi_url(self, obj): doi = client.build_doi(preprint=obj) if client else None return f'https://doi.org/{doi}' if doi else None - def update(self, preprint, validated_data): + def update(self, preprint, validated_data, skip_share=False): assert isinstance(preprint, Preprint), 'You must specify a valid preprint to be updated' auth = get_user_auth(self.context['request']) @@ -461,7 +475,7 @@ def require_admin_permission(): if 'subjects' in validated_data: subjects = validated_data.pop('subjects', None) - self.update_subjects(preprint, subjects, auth) + self.update_subjects(preprint, subjects, auth, skip_share=skip_share) save_preprint = True if 'title' in validated_data: @@ -549,6 +563,7 @@ class Meta: class PreprintCreateSerializer(PreprintSerializer): # Overrides PreprintSerializer to make id nullable, adds `create` + # TODO: add better Docstrings id = IDField(source='_id', required=False, allow_null=True) def create(self, validated_data): @@ -559,12 +574,35 @@ def create(self, validated_data): title = validated_data.pop('title') description = validated_data.pop('description', '') - preprint = Preprint(provider=provider, title=title, creator=creator, description=description) - preprint.save() + preprint = Preprint.create(provider=provider, title=title, creator=creator, description=description) return self.update(preprint, validated_data) +class PreprintCreateVersionSerializer(PreprintSerializer): + # Overrides PreprintSerializer to make title nullable and customize version creation + # TODO: add better Docstrings + id = IDField(source='_id', required=False, allow_null=True) + title = ser.CharField(required=False) + create_from_guid = ser.CharField(required=True, write_only=True) + + def create(self, validated_data): + create_from_guid = validated_data.pop('create_from_guid', None) + auth = get_user_auth(self.context['request']) + try: + preprint, data_to_update = Preprint.create_version(create_from_guid, auth) + except PermissionsError: + raise PermissionDenied(detail='User must have ADMIN permission to create a new preprint version.') + except UnpublishedPendingPreprintVersionExists: + raise Conflict(detail='Failed to create a new preprint version due to unpublished pending version exists.') + if not preprint: + raise NotFound(detail='Failed to create a new preprint version due to source preprint not found.') + if data_to_update: + # Share should not be updated during version creation + return self.update(preprint, data_to_update, skip_share=True) + return preprint + + class PreprintCitationSerializer(NodeCitationSerializer): class Meta: type_ = 'preprint-citation' diff --git a/api/preprints/urls.py b/api/preprints/urls.py index 5971d61c7e5..ee3d6a356eb 100644 --- a/api/preprints/urls.py +++ b/api/preprints/urls.py @@ -6,20 +6,21 @@ urlpatterns = [ re_path(r'^$', views.PreprintList.as_view(), name=views.PreprintList.view_name), - re_path(r'^(?P\w+)/$', views.PreprintDetail.as_view(), name=views.PreprintDetail.view_name), - re_path(r'^(?P\w+)/bibliographic_contributors/$', views.PreprintBibliographicContributorsList.as_view(), name=views.PreprintBibliographicContributorsList.view_name), - re_path(r'^(?P\w+)/citation/$', views.PreprintCitationDetail.as_view(), name=views.PreprintCitationDetail.view_name), - re_path(r'^(?P\w+)/citation/(?P[-\w]+)/$', views.PreprintCitationStyleDetail.as_view(), name=views.PreprintCitationStyleDetail.view_name), - re_path(r'^(?P\w+)/contributors/$', views.PreprintContributorsList.as_view(), name=views.PreprintContributorsList.view_name), - re_path(r'^(?P\w+)/contributors/(?P\w+)/$', views.PreprintContributorDetail.as_view(), name=views.PreprintContributorDetail.view_name), - re_path(r'^(?P\w+)/files/$', views.PreprintStorageProvidersList.as_view(), name=views.PreprintStorageProvidersList.view_name), - re_path(r'^(?P\w+)/files/osfstorage/$', views.PreprintFilesList.as_view(), name=views.PreprintFilesList.view_name), - re_path(r'^(?P\w+)/identifiers/$', views.PreprintIdentifierList.as_view(), name=views.PreprintIdentifierList.view_name), - re_path(r'^(?P\w+)/relationships/node/$', views.PreprintNodeRelationship.as_view(), name=views.PreprintNodeRelationship.view_name), - re_path(r'^(?P\w+)/relationships/subjects/$', views.PreprintSubjectsRelationship.as_view(), name=views.PreprintSubjectsRelationship.view_name), - re_path(r'^(?P\w+)/review_actions/$', views.PreprintActionList.as_view(), name=views.PreprintActionList.view_name), - re_path(r'^(?P\w+)/requests/$', views.PreprintRequestListCreate.as_view(), name=views.PreprintRequestListCreate.view_name), - re_path(r'^(?P\w+)/subjects/$', views.PreprintSubjectsList.as_view(), name=views.PreprintSubjectsList.view_name), - re_path(r'^(?P\w+)/institutions/$', views.PreprintInstitutionsList.as_view(), name=views.PreprintInstitutionsList.view_name), - re_path(r'^(?P\w+)/relationships/institutions/$', views.PreprintInstitutionsRelationship.as_view(), name=views.PreprintInstitutionsRelationship.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/$', views.PreprintDetail.as_view(), name=views.PreprintDetail.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/bibliographic_contributors/$', views.PreprintBibliographicContributorsList.as_view(), name=views.PreprintBibliographicContributorsList.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/citation/$', views.PreprintCitationDetail.as_view(), name=views.PreprintCitationDetail.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/citation/(?P[-\w]+)/$', views.PreprintCitationStyleDetail.as_view(), name=views.PreprintCitationStyleDetail.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/contributors/$', views.PreprintContributorsList.as_view(), name=views.PreprintContributorsList.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/contributors/(?P\w+)/$', views.PreprintContributorDetail.as_view(), name=views.PreprintContributorDetail.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/files/$', views.PreprintStorageProvidersList.as_view(), name=views.PreprintStorageProvidersList.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/files/osfstorage/$', views.PreprintFilesList.as_view(), name=views.PreprintFilesList.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/identifiers/$', views.PreprintIdentifierList.as_view(), name=views.PreprintIdentifierList.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/institutions/$', views.PreprintInstitutionsList.as_view(), name=views.PreprintInstitutionsList.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/relationships/institutions/$', views.PreprintInstitutionsRelationship.as_view(), name=views.PreprintInstitutionsRelationship.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/relationships/node/$', views.PreprintNodeRelationship.as_view(), name=views.PreprintNodeRelationship.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/relationships/subjects/$', views.PreprintSubjectsRelationship.as_view(), name=views.PreprintSubjectsRelationship.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/review_actions/$', views.PreprintActionList.as_view(), name=views.PreprintActionList.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/requests/$', views.PreprintRequestListCreate.as_view(), name=views.PreprintRequestListCreate.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/subjects/$', views.PreprintSubjectsList.as_view(), name=views.PreprintSubjectsList.view_name), + re_path(r'^(?P[A-Za-z0-9_]+)/versions/$', views.PreprintVersionsList.as_view(), name=views.PreprintVersionsList.view_name), ] diff --git a/api/preprints/views.py b/api/preprints/views.py index 4c3c52f936f..e83a8b8e412 100644 --- a/api/preprints/views.py +++ b/api/preprints/views.py @@ -5,12 +5,14 @@ from rest_framework.exceptions import MethodNotAllowed, NotFound, PermissionDenied, NotAuthenticated from rest_framework import permissions as drf_permissions +from framework import sentry from framework.auth.oauth_scopes import CoreScopes from osf.models import ( - ReviewAction, + Institution, Preprint, PreprintContributor, - Institution, + ReviewAction, + VersionedGuidMixin, ) from osf.utils.requests import check_select_for_update @@ -20,7 +22,7 @@ from api.base.pagination import PreprintContributorPagination from api.base.exceptions import Conflict from api.base.views import JSONAPIBaseView, WaterButlerMixin -from api.base.filters import ListFilterMixin, PreprintFilterMixin +from api.base.filters import ListFilterMixin, PreprintAsTargetFilterMixin, PreprintFilterMixin from api.base.parsers import ( JSONAPIMultipleRelationshipsParser, JSONAPIMultipleRelationshipsParserForRegularJSON, @@ -35,6 +37,7 @@ from api.preprints.serializers import ( PreprintSerializer, PreprintCreateSerializer, + PreprintCreateVersionSerializer, PreprintCitationSerializer, PreprintContributorDetailSerializer, PreprintContributorsSerializer, @@ -58,6 +61,7 @@ PreprintFilesPermissions, PreprintInstitutionPermissionList, ) +from api.providers.workflows import Workflows from api.nodes.permissions import ContributorOrPublic from api.base.permissions import WriteOrPublicForRelationshipInstitutions from api.requests.permissions import PreprintRequestPermission @@ -68,20 +72,60 @@ from osf.metrics import PreprintDownload, PreprintView +class PreprintOldVersionsImmutableMixin: + """Override method to reject modify requests for old preprint versions (except for withdrawal)""" + + @staticmethod + def is_edit_allowed(preprint): + if preprint.is_latest_version or preprint.machine_state == 'initial': + return True + if preprint.provider.reviews_workflow == Workflows.PRE_MODERATION.value: + if preprint.machine_state == 'pending': + return True + if preprint.machine_state == 'rejected' and preprint.version == VersionedGuidMixin.INITIAL_VERSION_NUMBER: + return True + return False + + def handle_request(self, request, method, *args, **kwargs): + preprint = self.get_preprint(check_object_permissions=False) + if PreprintOldVersionsImmutableMixin.is_edit_allowed(preprint): + return method(request, *args, **kwargs) + message = f'User can not edit previous versions of a preprint: [_id={preprint._id}]' + sentry.log_message(message) + raise Conflict(detail=message) + + def update(self, request, *args, **kwargs): + return self.handle_request(request, super().update, *args, **kwargs) + + def create(self, request, *args, **kwargs): + return self.handle_request(request, super().create, *args, **kwargs) + + def destroy(self, request, *args, **kwargs): + return self.handle_request(request, super().destroy, *args, **kwargs) + + class PreprintMixin(NodeMixin): serializer_class = PreprintSerializer preprint_lookup_url_kwarg = 'preprint_id' def get_preprint(self, check_object_permissions=True, ignore_404=False): - qs = Preprint.objects.filter(guids___id=self.kwargs[self.preprint_lookup_url_kwarg], guids___id__isnull=False) - try: - preprint = qs.select_for_update().get() if check_select_for_update(self.request) else qs.select_related('node').get() - except Preprint.DoesNotExist: + preprint_lookup_data = self.kwargs[self.preprint_lookup_url_kwarg].split('_v') + + base_guid_id = preprint_lookup_data[0] + preprint_version = preprint_lookup_data[1] if len(preprint_lookup_data) > 1 else None + if preprint_version: + qs = Preprint.objects.filter(versioned_guids__guid___id=base_guid_id, versioned_guids__version=preprint_version) + else: + qs = Preprint.published_objects.filter(versioned_guids__guid___id=base_guid_id).order_by('-versioned_guids__version') + + preprint = qs.select_for_update().first() if check_select_for_update(self.request) else qs.select_related('node').first() + if not preprint: + sentry.log_message(f'Preprint not found: [guid={base_guid_id}, version={preprint_version}]') if ignore_404: return raise NotFound - if preprint.deleted is not None: + sentry.log_message(f'Preprint deleted: [guid={base_guid_id}, version={preprint_version}]') raise NotFound # May raise a permission denied @@ -156,7 +200,60 @@ def get_annotated_queryset_with_metrics(self, queryset, metric_class, metric_nam ) -class PreprintDetail(PreprintMetricsViewMixin, JSONAPIBaseView, generics.RetrieveUpdateAPIView, PreprintMixin, WaterButlerMixin): +class PreprintVersionsList(PreprintMetricsViewMixin, JSONAPIBaseView, generics.ListCreateAPIView, PreprintFilterMixin): + # These permissions are not checked for the list of preprints, permissions handled by the query + permission_classes = ( + drf_permissions.IsAuthenticatedOrReadOnly, + base_permissions.TokenHasScope, + ContributorOrPublic, + ) + + parser_classes = (JSONAPIMultipleRelationshipsParser, JSONAPIMultipleRelationshipsParserForRegularJSON) + + required_read_scopes = [CoreScopes.PREPRINTS_READ] + required_write_scopes = [CoreScopes.PREPRINTS_WRITE] + + serializer_class = PreprintSerializer + + ordering = ('-created') + ordering_fields = ('created', 'date_last_transitioned') + view_category = 'preprints' + view_name = 'preprint-versions' + metric_map = { + 'downloads': PreprintDownload, + 'views': PreprintView, + } + + def get_serializer_class(self): + if self.request.method == 'POST': + return PreprintCreateVersionSerializer + else: + return PreprintSerializer + + def get_queryset(self): + preprint = Preprint.load(self.kwargs.get('preprint_id')) + if not preprint: + sentry.log_message(f'Preprint not found: [preprint_id={self.kwargs.get('preprint_id')}]') + raise NotFound + version_ids = preprint.versioned_guids.first().guid.versions.values_list('object_id', flat=True) + qs = Preprint.objects.filter(id__in=version_ids) + + auth = get_user_auth(self.request) + auth_user = getattr(auth, 'user', None) + + # Permissions on the list objects are handled by the query + public_only = self.metrics_requested + qs = self.preprints_queryset(qs, auth_user, public_only=public_only) + + return qs + + def create(self, request, *args, **kwargs): + request.data['type'] = 'preprints' + request.data['create_from_guid'] = kwargs.get('preprint_id') + return super().create(request, *args, **kwargs) + + +class PreprintDetail(PreprintOldVersionsImmutableMixin, PreprintMetricsViewMixin, JSONAPIBaseView, generics.RetrieveUpdateAPIView, PreprintMixin, WaterButlerMixin): """The documentation for this endpoint can be found [here](https://developer.osf.io/#operation/preprints_read). """ permission_classes = ( @@ -204,7 +301,7 @@ def get_parser_context(self, http_request): return res -class PreprintNodeRelationship(JSONAPIBaseView, generics.RetrieveUpdateAPIView, PreprintMixin): +class PreprintNodeRelationship(PreprintOldVersionsImmutableMixin, JSONAPIBaseView, generics.RetrieveUpdateAPIView, PreprintMixin): permission_classes = ( drf_permissions.IsAuthenticatedOrReadOnly, base_permissions.TokenHasScope, @@ -232,7 +329,7 @@ def get_object(self): return obj -class PreprintCitationDetail(JSONAPIBaseView, generics.RetrieveAPIView, PreprintMixin): +class PreprintCitationDetail(PreprintOldVersionsImmutableMixin, JSONAPIBaseView, generics.RetrieveAPIView, PreprintMixin): """The documentation for this endpoint can be found [here](https://developer.osf.io/#operation/preprints_citation_list). """ permission_classes = ( @@ -342,7 +439,7 @@ def get_object(self, check_object_permissions=True): return self.get_preprint(check_object_permissions=check_object_permissions) -class PreprintContributorsList(NodeContributorsList, PreprintMixin): +class PreprintContributorsList(PreprintOldVersionsImmutableMixin, NodeContributorsList, PreprintMixin): permission_classes = ( AdminOrPublic, drf_permissions.IsAuthenticatedOrReadOnly, @@ -386,7 +483,7 @@ def get_serializer_context(self): return context -class PreprintContributorDetail(NodeContributorDetail, PreprintMixin): +class PreprintContributorDetail(PreprintOldVersionsImmutableMixin, NodeContributorDetail, PreprintMixin): permission_classes = ( ContributorDetailPermissions, @@ -463,7 +560,7 @@ def get_resource(self): return self.get_preprint() -class PreprintSubjectsRelationship(SubjectRelationshipBaseView, PreprintMixin): +class PreprintSubjectsRelationship(PreprintOldVersionsImmutableMixin, SubjectRelationshipBaseView, PreprintMixin): """The documentation for this endpoint can be found [here](https://developer.osf.io/#operation/preprint_subjects_list). """ permission_classes = ( @@ -493,7 +590,7 @@ def get_object(self): return obj -class PreprintActionList(JSONAPIBaseView, generics.ListCreateAPIView, ListFilterMixin, PreprintMixin): +class PreprintActionList(JSONAPIBaseView, generics.ListCreateAPIView, PreprintAsTargetFilterMixin, PreprintMixin): """Action List *Read-only* Actions represent state changes and/or comments on a reviewable object (e.g. a preprint) @@ -624,7 +721,6 @@ def get_queryset(self): def get_resource(self): return get_object_or_error(Preprint, self.kwargs['preprint_id'], self.request) - class PreprintRequestListCreate(JSONAPIBaseView, generics.ListCreateAPIView, ListFilterMixin, PreprintRequestMixin): permission_classes = ( drf_permissions.IsAuthenticatedOrReadOnly, @@ -681,7 +777,7 @@ def get_queryset(self): return self.get_resource().affiliated_institutions.all() -class PreprintInstitutionsRelationship(JSONAPIBaseView, generics.RetrieveUpdateAPIView, PreprintMixin): +class PreprintInstitutionsRelationship(PreprintOldVersionsImmutableMixin, JSONAPIBaseView, generics.RetrieveUpdateAPIView, PreprintMixin): """ """ permission_classes = ( drf_permissions.IsAuthenticatedOrReadOnly, diff --git a/api/providers/views.py b/api/providers/views.py index 4018c9c40f0..940d29f8a2f 100644 --- a/api/providers/views.py +++ b/api/providers/views.py @@ -15,7 +15,7 @@ InvalidFilterOperator, InvalidFilterValue, ) -from api.base.filters import PreprintFilterMixin, ListFilterMixin +from api.base.filters import ListFilterMixin, PreprintAsTargetFilterMixin, PreprintFilterMixin from api.base.metrics import PreprintMetricsViewMixin from api.base.pagination import MaxSizePagination, IncreasedPageSizePagination from api.base.settings import BULK_SETTINGS @@ -571,7 +571,7 @@ def perform_create(self, serializer): raise ValidationError(f'Provider {provider.name} has no primary collection to submit to.') -class PreprintProviderWithdrawRequestList(JSONAPIBaseView, generics.ListAPIView, ListFilterMixin, ProviderMixin): +class PreprintProviderWithdrawRequestList(JSONAPIBaseView, generics.ListAPIView, PreprintAsTargetFilterMixin, ProviderMixin): provider_class = PreprintProvider permission_classes = ( drf_permissions.IsAuthenticated, diff --git a/api/share/utils.py b/api/share/utils.py index 4f4137dcf58..1178adfa85d 100644 --- a/api/share/utils.py +++ b/api/share/utils.py @@ -410,11 +410,12 @@ def serialize_share_data(resource, old_subjects=None): Preprint, Registration, ) - + suid = None if isinstance(resource, Preprint): # old_subjects is only used for preprints and should be removed as soon as SHARE # is fully switched over to the non-mergy pipeline (see ENG-2098) serializer = partial(serialize_preprint, old_subjects=old_subjects) + suid = resource.get_guid()._id elif isinstance(resource, Node): serializer = serialize_osf_node elif isinstance(resource, Registration): @@ -430,7 +431,7 @@ def serialize_share_data(resource, old_subjects=None): 'attributes': { 'tasks': [], 'raw': None, - 'suid': resource._id, + 'suid': resource._id if not suid else suid, 'data': serializer(resource), }, }, diff --git a/api/subjects/serializers.py b/api/subjects/serializers.py index 59e2a1c6bfe..085659dc3cf 100644 --- a/api/subjects/serializers.py +++ b/api/subjects/serializers.py @@ -15,11 +15,12 @@ class UpdateSubjectsMixin: - def update_subjects_method(self, resource, subjects, auth): + + def update_subjects_method(self, resource, subjects, auth, skip_share=False): # Method to update subjects on resource raise NotImplementedError() - def update_subjects(self, resource, subjects, auth): + def update_subjects(self, resource, subjects, auth, skip_share=False): """Updates subjects on resource and handles errors. :param object resource: Object for which you want to update subjects @@ -27,7 +28,7 @@ def update_subjects(self, resource, subjects, auth): :param object Auth object """ try: - self.update_subjects_method(resource, subjects, auth) + self.update_subjects_method(resource, subjects, auth, skip_share=skip_share) except PermissionsError as e: raise exceptions.PermissionDenied(detail=str(e)) except ValueError as e: @@ -106,9 +107,9 @@ def make_instance_obj(self, obj): def format_subjects(self, subjects): return [subj['_id'] for subj in subjects] - def update_subjects_method(self, resource, subjects, auth): + def update_subjects_method(self, resource, subjects, auth, skip_share=False): # Overrides UpdateSubjectsMixin - return resource.set_subjects_from_relationships(subjects, auth) + return resource.set_subjects_from_relationships(subjects, auth, skip_share=skip_share) def update(self, instance, validated_data): resource = instance['self'] diff --git a/api/taxonomies/serializers.py b/api/taxonomies/serializers.py index 8fb64d76115..ab23fee10e8 100644 --- a/api/taxonomies/serializers.py +++ b/api/taxonomies/serializers.py @@ -98,7 +98,7 @@ def get_subjects(self, obj): ] # Overrides UpdateSubjectsMixin - def update_subjects_method(self, resource, subjects, auth): + def update_subjects_method(self, resource, subjects, auth, skip_share=False): """Depending on the request's version, runs a different method to update the resource's subjects. Will expect request to be formatted differently, depending on the version. @@ -108,8 +108,8 @@ def update_subjects_method(self, resource, subjects, auth): :param object Auth object """ if self.expect_subjects_as_relationships(self.context['request']): - return resource.set_subjects_from_relationships(subjects, auth) - return resource.set_subjects(subjects, auth) + return resource.set_subjects_from_relationships(subjects, auth, skip_share=skip_share) + return resource.set_subjects(subjects, auth, skip_share=skip_share) def expect_subjects_as_relationships(self, request): """Determines whether subjects should be serialized as a relationship. diff --git a/api/users/views.py b/api/users/views.py index 927b5dc2f9b..f67babc667e 100644 --- a/api/users/views.py +++ b/api/users/views.py @@ -407,7 +407,7 @@ def get_default_queryset(self): # Permissions on the list objects are handled by the query default_qs = Preprint.objects.filter(_contributors__guids___id=target_user._id).exclude(machine_state='initial') - return self.preprints_queryset(default_qs, auth_user, allow_contribs=False) + return self.preprints_queryset(default_qs, auth_user, allow_contribs=False, latest_only=True) def get_queryset(self): return self.get_queryset_from_request() @@ -792,18 +792,22 @@ def post(self, request, *args, **kwargs): record_id = (request.data.get('id', None) or '').lower().strip() if not record_id: raise ValidationError('Must specify record "id".') + claimed_user = self.get_user(check_permissions=True) # Ensures claimability if claimed_user.is_disabled: raise ValidationError('Cannot claim disabled account.') - try: - record_referent = Guid.objects.get(_id=record_id).referent - except Guid.DoesNotExist: + + record_referent, _ = Guid.load_referent(record_id) + if not record_referent: raise NotFound('Unable to find specified record.') try: unclaimed_record = claimed_user.unclaimed_records[record_referent._id] except KeyError: - if isinstance(record_referent, Preprint) and record_referent.node and record_referent.node._id in claimed_user.unclaimed_records: + if isinstance( + record_referent, + Preprint, + ) and record_referent.node and record_referent.node._id in claimed_user.unclaimed_records: record_referent = record_referent.node unclaimed_record = claimed_user.unclaimed_records[record_referent._id] else: @@ -825,9 +829,9 @@ def post(self, request, *args, **kwargs): self._send_claim_email(claimer, claimed_user, record_referent, registered=True) except HTTPError as e: raise ValidationError(e.data['message_long']) - else: raise ValidationError('Must either be logged in or specify claim email.') + return Response(status=status.HTTP_204_NO_CONTENT) diff --git a/api/wb/views.py b/api/wb/views.py index e45ee69c7c2..6dac47673ca 100644 --- a/api/wb/views.py +++ b/api/wb/views.py @@ -24,10 +24,9 @@ def get_object(self): return self.get_target(self.kwargs[self.target_lookup_url_kwarg]) def get_target(self, target_id): - guid = Guid.load(target_id) - if not guid: + target, _ = Guid.load_referent(target_id) + if not target: raise NotFound - target = guid.referent if getattr(target, 'is_registration', False) and not getattr(target, 'archiving', False): raise ValidationError('Registrations cannot be changed.') return target diff --git a/api_tests/actions/views/test_action_list.py b/api_tests/actions/views/test_action_list.py index d04b57c1a43..bda6be6890a 100644 --- a/api_tests/actions/views/test_action_list.py +++ b/api_tests/actions/views/test_action_list.py @@ -154,7 +154,6 @@ def test_bad_requests(self, app, url, preprint, provider, moderator): ('initial', 'edit_comment'), ('initial', 'reject'), ('initial', 'withdraw'), - ('pending', 'submit'), ('rejected', 'reject'), ('rejected', 'submit'), ('rejected', 'withdraw'), diff --git a/api_tests/collections/test_views.py b/api_tests/collections/test_views.py index 6692b8915de..3f7d414663f 100644 --- a/api_tests/collections/test_views.py +++ b/api_tests/collections/test_views.py @@ -16,7 +16,7 @@ AuthUserFactory, SubjectFactory, ) -from osf.models import Collection +from osf.models import Collection, VersionedGuidMixin from osf.utils.sanitize import strip_html from osf.utils.permissions import ADMIN, WRITE, READ from website.project.signals import contributor_removed @@ -3894,10 +3894,14 @@ def url_collection_linked_preprints(self, collection): @pytest.fixture() def id_linked_preprints(self, collection): - return list( - collection.guid_links.values_list( - '_id', flat=True) - ) + res = [] + for guid in collection.guid_links.all(): + if guid.is_versioned: + for through_item in guid.versions.all(): + res.append(f'{guid._id}{VersionedGuidMixin.GUID_VERSION_DELIMITER}{through_item.version}') + else: + res.append(guid._id) + return res def test_linked_preprints_returns_everything( self, app, url_collection_linked_preprints, diff --git a/api_tests/nodes/views/test_node_preprints.py b/api_tests/nodes/views/test_node_preprints.py index 4c5ace85114..75d663cd23b 100644 --- a/api_tests/nodes/views/test_node_preprints.py +++ b/api_tests/nodes/views/test_node_preprints.py @@ -155,7 +155,6 @@ def test_unpublished_visible_to_admins( preprint_published, url): res = app.get(url, auth=user_admin_contrib.auth) assert len(res.json['data']) == 2 - assert preprint_unpublished._id in [d['id'] for d in res.json['data']] assert preprint_published._id in [d['id'] for d in res.json['data']] def test_unpublished_invisible_to_write_contribs( diff --git a/api_tests/preprints/filters/test_filters.py b/api_tests/preprints/filters/test_filters.py index f52b7accea2..9a483ad55ec 100644 --- a/api_tests/preprints/filters/test_filters.py +++ b/api_tests/preprints/filters/test_filters.py @@ -141,8 +141,7 @@ def test_id_filter_null(self, app, user, id_url): actual = [preprint['id'] for preprint in res.json['data']] assert expected == actual - def test_id_filter_equals_returns_one( - self, app, user, preprint_two, id_url): + def test_id_filter_equals_returns_one(self, app, user, preprint_two, id_url): expected = [preprint_two._id] res = app.get(f'{id_url}{preprint_two._id}', auth=user.auth) actual = [preprint['id'] for preprint in res.json['data']] diff --git a/api_tests/preprints/views/test_preprint_list.py b/api_tests/preprints/views/test_preprint_list.py index 5fd8f224a01..181351eecd1 100644 --- a/api_tests/preprints/views/test_preprint_list.py +++ b/api_tests/preprints/views/test_preprint_list.py @@ -16,7 +16,7 @@ PreprintIsValidListMixin, ) from api_tests.reviews.mixins.filter_mixins import ReviewableFilterMixin -from osf.models import Preprint, Node +from osf.models import Guid, Node, Preprint from osf import features from osf.utils.workflows import DefaultStates from osf.utils import permissions @@ -123,13 +123,21 @@ def test_create_preprint_does_not_create_a_node(self, app, user_one, provider, u def test_create_preprint_with_supplementary_node( self, app, user_one, provider, url, preprint_payload, supplementary_project ): - preprint_payload['data']['relationships']['node'] = {'data': {'id': supplementary_project._id, 'type': 'nodes'}} - res = app.post_json_api(url, preprint_payload, auth=user_one.auth) + preprint_payload['data']['relationships']['node'] = { + 'data': {'id': supplementary_project._id, 'type': 'nodes'} + } + res = app.post_json_api(url, preprint_payload, auth=user_one.auth) assert res.status_code == 201 preprint = Preprint.load(res.json['data']['id']) + preprint_id, version = Guid.split_guid(res.json['data']['id']) + assert preprint.node == supplementary_project - assert Node.objects.filter(preprints__guids___id=res.json['data']['id']).exists() + filtered_nodes = Node.objects.filter( + preprints__versioned_guids__guid___id=preprint_id, + preprints__versioned_guids__version=version + ) + assert supplementary_project in filtered_nodes def test_create_preprint_with_incorrectly_specified_node( self, app, user_one, provider, url, preprint_payload, supplementary_project diff --git a/api_tests/preprints/views/test_preprint_versions.py b/api_tests/preprints/views/test_preprint_versions.py new file mode 100644 index 00000000000..d7fbe2a9170 --- /dev/null +++ b/api_tests/preprints/views/test_preprint_versions.py @@ -0,0 +1,156 @@ +from django.contrib.contenttypes.models import ContentType +from django.db.models.fields import Field + +from addons.osfstorage import settings as osfstorage_settings +from addons.osfstorage.models import OsfStorageFile +from api.base.settings.defaults import API_BASE +from framework.auth.core import Auth +from osf.models import Preprint +from osf.utils import permissions +from osf_tests.factories import ProjectFactory, PreprintFactory, AuthUserFactory +from tests.base import ApiTestCase + + +# TODO: we have good coverage for `POST`; please add new tests for `GET` +class TestPreprintVersion(ApiTestCase): + + def setUp(self): + + super().setUp() + self.user = AuthUserFactory() + self.project = ProjectFactory(creator=self.user) + self.moderator = AuthUserFactory() + self.post_mod_preprint = PreprintFactory( + reviews_workflow='post-moderation', + is_published=True, + creator=self.user + ) + self.pre_mod_preprint = PreprintFactory( + reviews_workflow='pre-moderation', + is_published=False, + creator=self.user + ) + self.post_mod_preprint.provider.get_group('moderator').user_set.add(self.moderator) + self.pre_mod_preprint.provider.get_group('moderator').user_set.add(self.moderator) + self.post_mod_version_create_url = f'/{API_BASE}preprints/{self.post_mod_preprint._id}/versions/?version=2.20' + self.pre_mode_version_create_url = f'/{API_BASE}preprints/{self.pre_mod_preprint._id}/versions/?version=2.20' + self.post_mod_preprint_update_url = f'/{API_BASE}preprints/{self.post_mod_preprint._id}/?version=2.20' + self.post_mod_preprint_update_payload = { + 'data': { + 'id': self.post_mod_preprint._id, + 'type': 'preprints', + 'attributes': { + 'title': 'new_title', + } + } + } + + def test_create_preprint_version(self): + res = self.app.post_json_api(self.post_mod_version_create_url, auth=self.user.auth) + assert res.status_code == 201 + new_version = Preprint.load(res.json['data']['id']) + assert new_version.is_published is False + assert new_version.machine_state == 'initial' + assert new_version.files.count() == 0 + + def test_non_relation_fields(self): + res = self.app.post_json_api(self.post_mod_version_create_url, auth=self.user.auth) + ignored_fields = [ + 'id', 'created', 'modified', 'last_logged', 'date_last_reported', 'reports', 'date_last_transitioned', + 'machine_state', 'is_published', 'date_published', 'preprint_doi_created', 'ever_public', 'has_coi' + ] + new_version = Preprint.load(res.json['data']['id']) + non_relation_fields = [ + field.name for field in self.post_mod_preprint._meta.get_fields() + if isinstance(field, Field) and + not field.is_relation and + field.name not in ignored_fields + ] + preprint_data = {field: getattr(self.post_mod_preprint, field) for field in non_relation_fields} + preprint_version_data = {field: getattr(new_version, field) for field in non_relation_fields} + assert preprint_data == preprint_version_data + + def test_relation_fields(self): + res = self.app.post_json_api(self.post_mod_version_create_url, auth=self.user.auth) + new_version = Preprint.load(res.json['data']['id']) + assert self.post_mod_preprint.provider == new_version.provider + assert self.post_mod_preprint.node == new_version.node + assert self.post_mod_preprint.license == new_version.license + assert self.post_mod_preprint.creator == new_version.creator + assert self.post_mod_preprint.region == new_version.region + assert list(self.post_mod_preprint.tags.values_list('name', flat=True)) == list(new_version.tags.values_list('name', flat=True)) + assert list(self.post_mod_preprint.subjects.values_list('text', flat=True)) == list(new_version.subjects.values_list('text', flat=True)) + assert list(self.post_mod_preprint.affiliated_institutions.values_list('name', flat=True)) == list(new_version.affiliated_institutions.values_list('name', flat=True)) + assert list(self.post_mod_preprint._contributors.values_list('username', flat=True)) == list(new_version._contributors.values_list('username', flat=True)) + + def test_return_409_if_unpublished_pending_version_exists(self): + res = self.app.post_json_api(self.pre_mode_version_create_url, auth=self.user.auth) + new_version = Preprint.load(res.json['data']['id']) + filename = 'preprint_file.txt' + preprint_file = OsfStorageFile.create( + target_object_id=new_version.id, + target_content_type=ContentType.objects.get_for_model(new_version), + path=f'/{filename}', + name=filename, + materialized_path=f'/{filename}' + ) + preprint_file.save() + new_version.set_primary_file(preprint_file, auth=Auth(new_version.creator), save=True) + location = {'object': '06d80e', 'service': 'cloud', osfstorage_settings.WATERBUTLER_RESOURCE: 'osf', } + metadata = {'size': 1357, 'contentType': 'img/png', } + preprint_file.create_version(self.user, location, metadata=metadata).save() + new_version.run_submit(self.moderator) + assert new_version.is_published is False + assert new_version.machine_state == 'pending' + res = self.app.post_json_api(self.pre_mode_version_create_url, auth=self.user.auth, expect_errors=True) + assert res.status_code == 409 + + def test_return_409_if_try_to_edit_old_versions(self): + res = self.app.post_json_api(self.post_mod_version_create_url, auth=self.user.auth) + new_version = Preprint.load(res.json['data']['id']) + filename = 'preprint_file.txt' + preprint_file = OsfStorageFile.create( + target_object_id=new_version.id, + target_content_type=ContentType.objects.get_for_model(new_version), + path=f'/{filename}', + name=filename, + materialized_path=f'/{filename}' + ) + preprint_file.save() + new_version.set_primary_file(preprint_file, auth=Auth(new_version.creator), save=True) + location = {'object': '06d80e', 'service': 'cloud', osfstorage_settings.WATERBUTLER_RESOURCE: 'osf', } + metadata = {'size': 1357, 'contentType': 'img/png', } + preprint_file.create_version(self.user, location, metadata=metadata).save() + new_version.run_submit(self.moderator) + assert new_version.is_published is True + new_version.run_accept(self.moderator, 'comment') + assert new_version.machine_state == 'accepted' + res = self.app.patch_json_api( + self.post_mod_preprint_update_url, + self.post_mod_preprint_update_payload, + auth=self.user.auth, + expect_errors=True + ) + assert res.status_code == 409 + + def test_reuse_version_if_unfinished_version_exists(self): + res = self.app.post_json_api(self.post_mod_version_create_url, auth=self.user.auth) + unfinished_version = Preprint.load(res.json['data']['id']) + res = self.app.post_json_api(self.post_mod_version_create_url, auth=self.user.auth, expect_errors=True) + assert res.status_code == 201 + assert res.json['data']['id'] == unfinished_version._id + + def test_permission_error(self): + # No auth returns 401 + res = self.app.post_json_api(self.post_mod_version_create_url, auth=None, expect_errors=True) + assert res.status_code == 401 + # READ returns 403 + user_read = AuthUserFactory() + self.post_mod_preprint.add_contributor(user_read, permissions.READ) + res = self.app.post_json_api(self.post_mod_version_create_url, auth=user_read.auth, expect_errors=True) + assert res.status_code == 403 + # WRITE returns 403 + user_write = AuthUserFactory() + self.post_mod_preprint.add_contributor(user_write, permissions.WRITE) + res = self.app.post_json_api(self.post_mod_version_create_url, auth=user_write.auth, expect_errors=True) + assert res.status_code == 403 diff --git a/api_tests/share/_utils.py b/api_tests/share/_utils.py index a04808cac3c..2d974b75ccf 100644 --- a/api_tests/share/_utils.py +++ b/api_tests/share/_utils.py @@ -106,7 +106,7 @@ def expect_preprint_ingest_request(mock_share_responses, preprint, *, delete=Fal # and postcommit-task handling (so on_preprint_updated actually runs) with expect_ingest_request( mock_share_responses, - preprint._id, + preprint.get_guid()._id, token=preprint.provider.access_token, delete=delete, count=count, diff --git a/api_tests/users/views/test_user_actions.py b/api_tests/users/views/test_user_actions.py index 1236f70cc23..568574f5417 100644 --- a/api_tests/users/views/test_user_actions.py +++ b/api_tests/users/views/test_user_actions.py @@ -170,7 +170,6 @@ def test_bad_requests(self, app, url, preprint, provider, moderator): ('initial', 'accept'), ('initial', 'edit_comment'), ('initial', 'reject'), - ('pending', 'submit'), ('rejected', 'reject'), ('rejected', 'submit'), ], diff --git a/api_tests/wb/views/test_wb_hooks.py b/api_tests/wb/views/test_wb_hooks.py index 471af387396..20c09b14e69 100644 --- a/api_tests/wb/views/test_wb_hooks.py +++ b/api_tests/wb/views/test_wb_hooks.py @@ -813,6 +813,7 @@ def test_copy_checked_out_file_in_folder(self, app, root_node, user, folder, fol 'name': folder_two.name, } }) + res = app.post_json(copy_url, signed_payload, expect_errors=True) assert res.status_code == 201 diff --git a/framework/exceptions/__init__.py b/framework/exceptions/__init__.py index a2b076490f6..328dbd1fe13 100644 --- a/framework/exceptions/__init__.py +++ b/framework/exceptions/__init__.py @@ -110,3 +110,8 @@ class TemplateHTTPError(HTTPError): def __init__(self, code, message=None, redirect_url=None, data=None, template=None): self.template = template super().__init__(code, message, redirect_url, data) + +class UnpublishedPendingPreprintVersionExists(FrameworkError): + """Raised if an unpublished pending preprint version exists + """ + pass diff --git a/osf/external/spam/tasks.py b/osf/external/spam/tasks.py index 0f5d2fbd7f3..e00df54d237 100644 --- a/osf/external/spam/tasks.py +++ b/osf/external/spam/tasks.py @@ -40,10 +40,10 @@ def reclassify_domain_references(notable_domain_id, current_note, previous_note) def _check_resource_for_domains(guid, content): from osf.models import Guid, NotableDomain, DomainReference - guid = Guid.load(guid) - if not guid: + resource, _ = Guid.load_referent(guid) + if not resource: return f'{guid} not found' - resource = guid.referent + spammy_domains = [] referrer_content_type = ContentType.objects.get_for_model(resource) for domain, note in _extract_domains(content): diff --git a/osf/management/commands/check_crossref_dois.py b/osf/management/commands/check_crossref_dois.py index 264ec695593..f44655b49a9 100644 --- a/osf/management/commands/check_crossref_dois.py +++ b/osf/management/commands/check_crossref_dois.py @@ -1,18 +1,17 @@ +from datetime import timedelta import logging import requests -from datetime import timedelta - -from framework.celery_tasks import app as celery_app -from website import settings -from website import mails -from django.utils import timezone -from django.core.management.base import BaseCommand - import django +from django.core.management.base import BaseCommand +from django.utils import timezone django.setup() -from osf.models import Preprint +from framework import sentry +from framework.celery_tasks import app as celery_app +from osf.models import Guid, Preprint +from website import mails, settings + logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @@ -27,6 +26,19 @@ def pop_slice(lis, n): del lis[:n] return tem +def create_dois_locally(): + """ + This script creates identifiers for preprints which have pending DOI in local environment. + """ + preprints_with_pending_doi = Preprint.objects.filter( + preprint_doi_created__isnull=True, + is_published=True + ) + + for preprint in preprints_with_pending_doi: + client = preprint.get_doi_client() + doi = client.build_doi(preprint=preprint) if client else None + preprint.set_identifier_values(doi, save=True) def check_crossref_dois(dry_run=True): """ @@ -53,8 +65,13 @@ def check_crossref_dois(dry_run=True): pending_dois = [] for preprint in preprint_batch: - prefix = preprint.provider.doi_prefix - pending_dois.append(f'doi:{settings.DOI_FORMAT.format(prefix=prefix, guid=preprint._id)}') + doi_prefix = preprint.provider.doi_prefix + if not doi_prefix: + sentry.log_message(f'Preprint [_id={preprint._id}] has been skipped for CrossRef DOI Check ' + f'since the provider [_id={preprint.provider._id}] has invalid DOI Prefix ' + f'[doi_prefix={doi_prefix}]') + continue + pending_dois.append(f'doi:{settings.DOI_FORMAT.format(prefix=doi_prefix, guid=preprint._id)}') url = '{}works?filter={}'.format(settings.CROSSREF_JSON_API_URL, ','.join(pending_dois)) @@ -62,18 +79,29 @@ def check_crossref_dois(dry_run=True): resp = requests.get(url) resp.raise_for_status() except requests.exceptions.HTTPError as exc: - logger.error(f'Could not contact crossref to check for DOIs, response returned with exception {exc}') - raise exc + sentry.log_message(f'Could not contact crossref to check for DOIs, response returned with exception {exc}') + continue preprints_response = resp.json()['message']['items'] for preprint in preprints_response: - guid = preprint['DOI'].split('/')[-1] - pending_preprint = preprints_with_pending_dois.get(guids___id=guid) + preprint__id = preprint['DOI'].split('/')[-1] + base_guid, version = Guid.split_guid(preprint__id) + if not base_guid or not version: + sentry.log_message(f'[Skipped] Preprint [_id={preprint__id}] returned by CrossRef API has invalid _id') + continue + pending_preprint = preprints_with_pending_dois.filter( + versioned_guids__guid___id=base_guid, + versioned_guids__version=version, + ).first() + if not pending_preprint: + sentry.log_message(f'[Skipped] Preprint [_id={preprint__id}] returned by CrossRef API is not found.') + continue if not dry_run: + logger.debug(f'Set identifier for {pending_preprint._id}') pending_preprint.set_identifier_values(preprint['DOI'], save=True) else: - logger.info('DRY RUN') + logger.info(f'DRY RUN: Set identifier for {pending_preprint._id}') def report_stuck_dois(dry_run=True): diff --git a/osf/management/commands/migrate_non_versioned_preprints.py b/osf/management/commands/migrate_non_versioned_preprints.py new file mode 100644 index 00000000000..f9283a69e20 --- /dev/null +++ b/osf/management/commands/migrate_non_versioned_preprints.py @@ -0,0 +1,82 @@ +from django.core.management.base import BaseCommand +from django.apps import apps +from tqdm import tqdm + + +class Command(BaseCommand): + """Migrate non-versioned preprints to versioned using guid versions. + For each preprint, it creates an entry in the GuidVersionsThrough table as version 1. + """ + + help = 'Migrate non-versioned preprints to versioned using guid versions.' + + def add_arguments(self, parser): + + parser.add_argument( + '--dry_run', + action='store_true', + dest='dry_run', + help='Run the command without making changes to the database (default: True).', + ) + + parser.add_argument( + '--batch_size', + type=int, + default=100, + help='Batch size for processing preprints (default: 100).', + ) + + def handle(self, *args, **options): + + dry_run = options['dry_run'] + batch_size = options['batch_size'] + if dry_run: + self.stdout.write(self.style.WARNING('This is a DRY_RUN pass!')) + else: + self.stdout.write(self.style.SUCCESS('Migrate started!')) + + ContentType = apps.get_model('contenttypes', 'ContentType') + GuidVersionsThrough = apps.get_model('osf', 'GuidVersionsThrough') + Preprint = apps.get_model('osf', 'Preprint') + + content_type_id = ContentType.objects.get_for_model(Preprint).id + qs = Preprint.objects.filter(versioned_guids__isnull=True).order_by('id') + if not qs.exists(): + self.stdout.write(self.style.WARNING('No preprints to migrate!')) + return + first_id = qs.first().id + last_id = qs.last().id + + vq_list = [] + total_migrated = 0 + p_batch_ids = [[x, x + batch_size - 1] for x in range(first_id, last_id + 1, batch_size)] + + for ids in tqdm(p_batch_ids, desc='Processing', unit='batch'): + preprints_list = Preprint.objects.filter(id__range=ids) + for preprint in preprints_list: + guid = preprint.guids.first() + if not guid: + if dry_run: + self.stdout.write( + self.style.ERROR(f'Preprint object [pk={preprint.pk}] will be skipped due to missing guid.') + ) + else: + if not guid.versions.exists(): + vq_list.append( + GuidVersionsThrough( + object_id=preprint.id, + version=1, + content_type_id=content_type_id, + guid_id=guid.id + ) + ) + if vq_list: + if not dry_run: + GuidVersionsThrough.objects.bulk_create(vq_list, batch_size=len(vq_list)) + total_migrated += len(vq_list) + vq_list = [] + success_message = f'Migration completed successfully! [{total_migrated}] preprints have been migrated.' + if dry_run: + self.stdout.write(self.style.WARNING(f'DRY_RUN: {success_message}')) + else: + self.stdout.write(self.style.SUCCESS(success_message)) diff --git a/osf/migrations/0025_versioned_guid_for_preprints_doi_versioning.py b/osf/migrations/0025_versioned_guid_for_preprints_doi_versioning.py new file mode 100644 index 00000000000..b2e2316aef9 --- /dev/null +++ b/osf/migrations/0025_versioned_guid_for_preprints_doi_versioning.py @@ -0,0 +1,42 @@ +# Generated by Django 4.2.15 on 2024-12-12 15:10 + +import django.contrib.postgres.fields +from django.db import migrations, models +import django.db.models.deletion +import django_extensions.db.fields +import osf.models.base +import osf.utils.fields + + +class Migration(migrations.Migration): + + dependencies = [ + ('contenttypes', '0002_remove_content_type_name'), + ('osf', '0024_institution_link_to_external_reports_archive'), + ] + + operations = [ + migrations.AlterField( + model_name='notificationdigest', + name='node_lineage', + field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(max_length=31), size=None), + ), + migrations.CreateModel( + name='GuidVersionsThrough', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('modified', django_extensions.db.fields.ModificationDateTimeField(auto_now=True, verbose_name='modified')), + ('created', osf.utils.fields.NonNaiveDateTimeField(auto_now_add=True, db_index=True)), + ('object_id', models.PositiveIntegerField(blank=True, null=True)), + ('version', models.PositiveIntegerField(blank=True, null=True)), + ('is_rejected', models.BooleanField(default=False)), + ('content_type', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='contenttypes.contenttype')), + ('guid', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='versions', to='osf.guid')), + ], + bases=(models.Model, osf.models.base.QuerySetExplainMixin), + ), + migrations.AddConstraint( + model_name='guidversionsthrough', + constraint=models.UniqueConstraint(fields=('guid', 'version'), name='unique_guid_version'), + ), + ] diff --git a/osf/models/__init__.py b/osf/models/__init__.py index cad31ea323f..652d94942a9 100644 --- a/osf/models/__init__.py +++ b/osf/models/__init__.py @@ -16,6 +16,8 @@ from .base import ( BlackListGuid, Guid, + GuidVersionsThrough, + VersionedGuidMixin, ) from .brand import Brand from .cedar_metadata import CedarMetadataRecord, CedarMetadataTemplate diff --git a/osf/models/base.py b/osf/models/base.py index e8b4ff38ca7..1d01ede4032 100644 --- a/osf/models/base.py +++ b/osf/models/base.py @@ -1,24 +1,24 @@ +from collections.abc import Iterable import logging import random -from collections.abc import Iterable import bson -from django.contrib.contenttypes.fields import (GenericForeignKey, - GenericRelation) +from django.contrib.contenttypes.fields import GenericForeignKey, GenericRelation from django.contrib.contenttypes.models import ContentType from django.core.exceptions import MultipleObjectsReturned from django.core.exceptions import ValidationError as DjangoValidationError from django.db import connections, models -from django.db.models import ForeignKey +from django.db.models import ForeignKey, UniqueConstraint from django.db.models.query import QuerySet from django.db.models.signals import post_save from django.dispatch import receiver from django_extensions.db.models import TimeStampedModel -from website import settings as website_settings -from osf.utils.caching import cached_property +from framework import sentry from osf.exceptions import ValidationError +from osf.utils.caching import cached_property from osf.utils.fields import LowercaseCharField, NonNaiveDateTimeField +from website import settings as website_settings ALPHABET = '23456789abcdefghjkmnpqrstuvwxyz' @@ -57,10 +57,10 @@ def coerce_guid(maybe_guid, create_if_needed=False): raise InvalidGuid(f'guid does not exist ({maybe_guid})') return guid if isinstance(maybe_guid, str): - try: - return Guid.objects.get(_id=maybe_guid) - except Guid.DoesNotExist: + guid = Guid.load(maybe_guid) + if not guid: raise InvalidGuid(f'guid does not exist ({maybe_guid})') + return guid raise InvalidGuid(f'cannot coerce {type(maybe_guid)} ({maybe_guid}) into Guid') @@ -188,7 +188,6 @@ def get_semantic_iris(self) -> Iterable[str]: yield from () # no semantic iris unless implemented in a subclass -# TODO: Rename to Identifier? class Guid(BaseModel): """Stores either a short guid or long object_id for any model that inherits from BaseIDMixin. Each ID field (e.g. 'guid', 'object_id') MUST have an accompanying method, named with @@ -202,20 +201,77 @@ class Guid(BaseModel): referent = GenericForeignKey() content_type = models.ForeignKey(ContentType, null=True, blank=True, on_delete=models.CASCADE) object_id = models.PositiveIntegerField(null=True, blank=True) + created = NonNaiveDateTimeField(db_index=True, auto_now_add=True) def __repr__(self): return f'' - # Override load in order to load by GUID @classmethod - def load(cls, data, select_for_update=False): + def split_guid(cls, guid_str): + """Check if the guid str contains version and return a tuple that contains the base guid str and the version. + """ + if not guid_str: + return None, None + guid_parts = guid_str.lower().split(VersionedGuidMixin.GUID_VERSION_DELIMITER) + base_guid_str = guid_parts[0] + version = guid_parts[1] if len(guid_parts) > 1 else None + return base_guid_str, version + + @classmethod + def load(cls, data, select_for_update=False, ignore_not_found=False): + """Override load in order to load by Guid. + + Update with versioned Guid: if the guid str stored in data is versioned, only the base guid str is used. This + is the expected design because base guid str remains a valid one, and it always refers to the latest version. + + If `ignore_not_found` is True, then don't log to sentry. This is used in `website.views.resolve_guid()`. + """ + if not data: + return None + base_guid_str, version = cls.split_guid(data) try: - return cls.objects.get(_id=data) if not select_for_update else cls.objects.filter( - _id=data).select_for_update().get() + if not select_for_update: + return cls.objects.get(_id=base_guid_str) + return cls.objects.filter(_id=base_guid_str).select_for_update().get() except cls.DoesNotExist: + if not ignore_not_found: + sentry.log_message(f'Object not found from base guid: ' + f'[data={data}, guid={base_guid_str}, version={version}]') return None + @classmethod + def load_referent(cls, guid_str, ignore_not_found=False): + """Find and return the referent from a given guid str. + """ + if not guid_str: + return None, None + base_guid_str, version = cls.split_guid(guid_str) + base_guid_obj = cls.load(base_guid_str, ignore_not_found=ignore_not_found) + if not base_guid_obj: + return None, None + # Handles versioned guid str + if version: + if base_guid_obj.is_versioned: + versioned_obj_qs = base_guid_obj.versions.filter(version=version) + if not versioned_obj_qs.exists(): + sentry.log_message(f'Version not found for versioned guid: [guid={base_guid_str}, version={version}]') + return None, None + else: + sentry.log_message(f'The guid object does not support versioning: [guid={base_guid_str}, version={version}]') + return None, None + referent = versioned_obj_qs.first().referent + return referent, referent.version + # Handles guid str without version + referent = base_guid_obj.referent + # If the guid str doesn't have version but supports versioning, we need to check and return the version + version = referent.version if hasattr(referent, 'version') else None + return referent, version + + @property + def is_versioned(self): + return self.versions.exists() + class Meta: ordering = ['-created'] get_latest_by = 'created' @@ -224,6 +280,24 @@ class Meta: ) +class GuidVersionsThrough(BaseModel): + """Stores versions of versioned guid obj. It refers to both the versioned obj (referent) and the base guid obj. + """ + + created = NonNaiveDateTimeField(db_index=True, auto_now_add=True) + referent = GenericForeignKey() + content_type = models.ForeignKey(ContentType, null=True, blank=True, on_delete=models.CASCADE) + object_id = models.PositiveIntegerField(null=True, blank=True) + guid = models.ForeignKey('Guid', related_name='versions', on_delete=models.CASCADE) + version = models.PositiveIntegerField(null=True, blank=True) + is_rejected = models.BooleanField(default=False) + + class Meta: + constraints = [ + UniqueConstraint(fields=['guid', 'version'], name='unique_guid_version') + ] + + class BlackListGuid(BaseModel): id = models.AutoField(primary_key=True) guid = LowercaseCharField(max_length=255, unique=True, db_index=True) @@ -443,16 +517,119 @@ class Meta: abstract = True +class VersionedGuidMixin(GuidMixin): + """Inherits from `GuidMixin` to support objects that use the `GuidVersionsThrough` table for versioning. + """ + + class Meta: + abstract = True + + INITIAL_VERSION_NUMBER = 1 + GUID_VERSION_DELIMITER = '_v' + + versioned_guids = GenericRelation('GuidVersionsThrough', related_name='referent', related_query_name='referents') + + @cached_property + def _id(self): + try: + versioned_guid = self.versioned_guids + if not versioned_guid.exists(): + # This can happen during the gap AFTER preprint version is created and BEFORE versioned guid is created. + # This happens every time recursively inside `.super().save()` when the `preprint.save()` is called for + # the first time during preprint creation and new preprint version creation. + sentry.log_message( + f'`self.versioned_guids` does not exist: [self={self.pk}, type={type(self).__name__}]' + ) + return None + guid = versioned_guid.first().guid + version = versioned_guid.first().version + except IndexError as e: + sentry.log_exception(e) + return None + return f'{guid._id}{VersionedGuidMixin.GUID_VERSION_DELIMITER}{version}' + + @_id.setter + def _id(self, value): + # TODO: should we enable setter for `_id`, which we found some usage in unit tests + pass + + _primary_key = _id + + @cached_property + def version(self): + # Once assigned, version number never changes + return self.versioned_guids.first().version + + @classmethod + def load(cls, guid_str, select_for_update=False): + """Override load in order to load by Versioned Guid. It finds and returns the versioned object based on the + base guid str and the version in the guid str. If the guid str does not have version, it returns the object + referred by the base guid obj. + """ + if not guid_str: + return None + try: + base_guid_str, version = Guid.split_guid(guid_str) + # Version exists + if version: + if not select_for_update: + return cls.objects.get(versioned_guids__guid___id=base_guid_str, versioned_guids__version=version) + return cls.objects.filter( + versioned_guids__guid___id=base_guid_str, + versioned_guids__version=version + ).select_for_update().get() + # Version does not exists + if not select_for_update: + return cls.objects.filter(guids___id=base_guid_str).first() + return cls.objects.filter(guids___id=guid_str).select_for_update().get() + except cls.DoesNotExist: + sentry.log_message(f'Object not found for VersionedGuidMixin: [guid_str={guid_str}]') + return None + except cls.MultipleObjectsReturned: + return None + + def get_guid(self): + """A helper for getting the base guid + """ + return self.versioned_guids.first().guid + + def get_semantic_iri(self): + """Override `get_semantic_iri()` in `GuidMixin` so that all versions of the same object have the same semantic + iri using only the base guid str. + """ + _osfid = self.get_guid()._id + if not _osfid: + raise ValueError(f'no osfid for {self} (cannot build semantic iri)') + return osfid_iri(_osfid) + + def update_search(self, skip_share=False): + """Subclasses must implement `update_search()` with kwarg `skip_share=False`.""" + raise NotImplementedError() + @receiver(post_save) -def ensure_guid(sender, instance, created, **kwargs): +def ensure_guid(sender, instance, **kwargs): + """Generate guid if it doesn't exist for subclasses of GuidMixin except for subclasses of VersionedGuidMixin + + Note: must have **kwargs though not used since signal receivers must accept keyword arguments. + """ if not issubclass(sender, GuidMixin): return False - existing_guids = Guid.objects.filter(object_id=instance.pk, - content_type=ContentType.objects.get_for_model(instance)) + if issubclass(sender, VersionedGuidMixin): + # For classes that support version using VersionedGuidMixin, the base guid object must be generated manually. + # Only the initial or the latest version is referred to by the base guid in the Guid table. All versions have + # their "versioned" guid in the GuidVersionsThrough table. + return False + existing_guids = Guid.objects.filter( + object_id=instance.pk, + content_type=ContentType.objects.get_for_model(instance) + ) has_cached_guids = hasattr(instance, '_prefetched_objects_cache') and 'guids' in instance._prefetched_objects_cache if not existing_guids.exists(): # Clear query cache of instance.guids if has_cached_guids: del instance._prefetched_objects_cache['guids'] - Guid.objects.create(object_id=instance.pk, content_type=ContentType.objects.get_for_model(instance), - _id=generate_guid(instance.__guid_min_length__)) + Guid.objects.create( + object_id=instance.pk, + content_type=ContentType.objects.get_for_model(instance), + _id=generate_guid(instance.__guid_min_length__) + ) diff --git a/osf/models/mixins.py b/osf/models/mixins.py index 17c35db04a6..aa6c43fa525 100644 --- a/osf/models/mixins.py +++ b/osf/models/mixins.py @@ -297,8 +297,8 @@ class AffiliatedInstitutionMixin(models.Model): affiliated_institutions = models.ManyToManyField('Institution', related_name='nodes') - def add_affiliated_institution(self, inst, user, log=True): - if not user.is_affiliated_with_institution(inst): + def add_affiliated_institution(self, inst, user, log=True, ignore_user_affiliation=False): + if not user.is_affiliated_with_institution(inst) and not ignore_user_affiliation: raise UserNotAffiliatedError(f'User is not affiliated with {inst.name}') if not self.is_affiliated_with_institution(inst): self.affiliated_institutions.add(inst) @@ -1104,7 +1104,7 @@ def assert_subject_format(self, subj_list, expect_list, error_msg): if (expect_list and not is_list) or (not expect_list and is_list): raise ValidationValueError(f'Subjects are improperly formatted. {error_msg}') - def set_subjects(self, new_subjects, auth, add_log=True): + def set_subjects(self, new_subjects, auth, add_log=True, skip_share=False): """ Helper for setting M2M subjects field from list of hierarchies received from UI. Only authorized admins may set subjects. @@ -1135,9 +1135,13 @@ def set_subjects(self, new_subjects, auth, add_log=True): self.save() if hasattr(self, 'update_search'): - self.update_search() + from osf.models.base import VersionedGuidMixin + if isinstance(self, VersionedGuidMixin): + self.update_search(skip_share=skip_share) + else: + self.update_search() - def set_subjects_from_relationships(self, subjects_list, auth, add_log=True): + def set_subjects_from_relationships(self, subjects_list, auth, add_log=True, skip_share=False): """ Helper for setting M2M subjects field from list of flattened subjects received from UI. Only authorized admins may set subjects. @@ -1162,7 +1166,11 @@ def set_subjects_from_relationships(self, subjects_list, auth, add_log=True): self.save() if hasattr(self, 'update_search'): - self.update_search() + from osf.models.base import VersionedGuidMixin + if isinstance(self, VersionedGuidMixin): + self.update_search(skip_share=skip_share) + else: + self.update_search() def map_subjects_between_providers(self, old_provider, new_provider, auth=None): """ @@ -1383,11 +1391,14 @@ def add_contributor(self, contributor, permissions=None, visible=True, ) if save: self.save() - if self._id and contrib_to_add: - project_signals.contributor_added.send(self, - contributor=contributor, - auth=auth, email_template=send_email, permissions=permissions) + project_signals.contributor_added.send( + self, + contributor=contributor, + auth=auth, + email_template=send_email, + permissions=permissions + ) # enqueue on_node_updated/on_preprint_updated to update DOI metadata when a contributor is added if getattr(self, 'get_identifier_value', None) and self.get_identifier_value('doi'): diff --git a/osf/models/notifications.py b/osf/models/notifications.py index 961ec1d47c8..86be3424832 100644 --- a/osf/models/notifications.py +++ b/osf/models/notifications.py @@ -104,4 +104,4 @@ class NotificationDigest(ObjectIDMixin, BaseModel): event = models.CharField(max_length=50) message = models.TextField() # TODO: Could this be a m2m with or without an order field? - node_lineage = ArrayField(models.CharField(max_length=5)) + node_lineage = ArrayField(models.CharField(max_length=31)) diff --git a/osf/models/preprint.py b/osf/models/preprint.py index 87c337532cc..c1109c00c04 100644 --- a/osf/models/preprint.py +++ b/osf/models/preprint.py @@ -4,7 +4,7 @@ import re from dirtyfields import DirtyFieldsMixin -from django.db import models +from django.db import models, IntegrityError from django.db.models import Q from django.utils import timezone from django.contrib.contenttypes.fields import GenericRelation @@ -15,8 +15,9 @@ from django.contrib.contenttypes.models import ContentType from django.db.models.signals import post_save +from framework import sentry from framework.auth import Auth -from framework.exceptions import PermissionsError +from framework.exceptions import PermissionsError, UnpublishedPendingPreprintVersionExists from framework.auth import oauth_scopes from .subject import Subject @@ -42,7 +43,7 @@ from website import settings, mails from website.preprints.tasks import update_or_enqueue_on_preprint_updated -from .base import BaseModel, GuidMixin, GuidMixinQuerySet +from .base import BaseModel, Guid, GuidVersionsThrough, GuidMixinQuerySet, VersionedGuidMixin from .identifiers import IdentifierMixin, Identifier from .mixins import TaxonomizableMixin, ContributorMixin, SpamOverrideMixin, TitleMixin, DescriptionMixin from addons.osfstorage.models import OsfStorageFolder, Region, BaseFileNode, OsfStorageFile @@ -51,10 +52,13 @@ from osf.exceptions import ( PreprintStateError, InvalidTagError, - TagNotFoundError + TagNotFoundError, + UserStateError, + ValidationValueError, ) from django.contrib.postgres.fields import ArrayField from api.share.utils import update_share +from api.providers.workflows import Workflows logger = logging.getLogger(__name__) @@ -106,11 +110,25 @@ def can_view(self, base_queryset=None, user=None, allow_contribs=True, public_on # TODO: Remove need for .distinct using correct subqueries return ret.distinct('id', 'created') if include_non_public else ret +class PublishedPreprintManager(PreprintManager): + def get_queryset(self): + return super().get_queryset().filter(is_published=True) + +class RejectedPreprintManager(PreprintManager): + def get_queryset(self): + return super().get_queryset().filter(actions__to_state='rejected') -class Preprint(DirtyFieldsMixin, GuidMixin, IdentifierMixin, ReviewableMixin, BaseModel, TitleMixin, DescriptionMixin, +class EverPublishedPreprintManager(PreprintManager): + def get_queryset(self): + return super().get_queryset().filter(date_published__isnull=False) + +class Preprint(DirtyFieldsMixin, VersionedGuidMixin, IdentifierMixin, ReviewableMixin, BaseModel, TitleMixin, DescriptionMixin, Loggable, Taggable, ContributorMixin, GuardianMixin, SpamOverrideMixin, TaxonomizableMixin, AffiliatedInstitutionMixin): objects = PreprintManager() + published_objects = PublishedPreprintManager() + ever_published_objects = EverPublishedPreprintManager() + rejected_objects = RejectedPreprintManager() # Preprint fields that trigger a check to the spam filter on save SPAM_CHECK_FIELDS = { 'title', @@ -269,6 +287,173 @@ class Meta: def __unicode__(self): return '{} ({} preprint) (guid={}){}'.format(self.title, 'published' if self.is_published else 'unpublished', self._id, ' with supplemental files on ' + self.node.__unicode__() if self.node else '') + @classmethod + def create(cls, provider, title, creator, description): + """Customized creation process to support preprint versions and versioned guid. + """ + # Step 1: Create the preprint obj + preprint = cls( + provider=provider, + title=title, + creator=creator, + description=description, + ) + preprint.save(guid_ready=False) + # Step 2: Create the base guid obj + base_guid_obj = Guid.objects.create() + base_guid_obj.referent = preprint + base_guid_obj.object_id = preprint.pk + base_guid_obj.content_type = ContentType.objects.get_for_model(preprint) + base_guid_obj.save() + # Step 3: Create a new entry in the `GuidVersionsThrough` table to store version information + versioned_guid = GuidVersionsThrough( + referent=base_guid_obj.referent, + object_id=base_guid_obj.object_id, + content_type=base_guid_obj.content_type, + version=VersionedGuidMixin.INITIAL_VERSION_NUMBER, + guid=base_guid_obj + ) + versioned_guid.save() + preprint.save(guid_ready=True, first_save=True) + + return preprint + + def get_last_not_rejected_version(self): + """Get the last version that is not rejected. + """ + return self.get_guid().versions.filter(is_rejected=False).order_by('-version').first().referent + + def has_unpublished_pending_version(self): + """Check if preprint has pending unpublished version. + Note: use `.check_unfinished_or_unpublished_version()` if checking both types + """ + last_not_rejected_version = self.get_last_not_rejected_version() + return not last_not_rejected_version.date_published and last_not_rejected_version.machine_state == 'pending' + + def has_initiated_but_unfinished_version(self): + """Check if preprint has initiated but unfinished version. + Note: use `.check_unfinished_or_unpublished_version()` if checking both types + """ + last_not_rejected_version = self.get_last_not_rejected_version() + return not last_not_rejected_version.date_published and last_not_rejected_version.machine_state == 'initial' + + def check_unfinished_or_unpublished_version(self): + """Check and return the "initiated but unfinished version" and "unfinished or unpublished version". + """ + last_not_rejected_version = self.get_last_not_rejected_version() + if last_not_rejected_version.date_published: + return None, None + if last_not_rejected_version.machine_state == 'initial': + return last_not_rejected_version, None + if last_not_rejected_version.machine_state == 'pending': + return None, last_not_rejected_version + return None, None + + @classmethod + def create_version(cls, create_from_guid, auth): + """Create a new version for a given preprint. `create_from_guid` can be any existing versions of the preprint + but `create_version` always finds the latest version and creates a new version from it. In addition, this + creates an "incomplete" new preprint version object using the model class and returns both the new object and + the data to be updated. The API, more specifically `PreprintCreateVersionSerializer` must call `.update()` to + "completely finish" the new preprint version object creation. + """ + + # Use `Guid.load()` instead of `VersionedGuid.load()` to retrieve the base guid obj, which always points to the + # latest (ever-published) version. + guid_obj = Guid.load(create_from_guid) + latest_version = cls.load(guid_obj._id) + if not latest_version: + sentry.log_message(f'Preprint not found: [guid={guid_obj._id}, create_from_guid={create_from_guid}]') + return None, None + if not latest_version.has_permission(auth.user, ADMIN): + sentry.log_message(f'ADMIN permission for the latest version is required to create a new version: ' + f'[user={auth.user._id}, guid={guid_obj._id}, latest_version={latest_version._id}]') + raise PermissionsError + unfinished_version, unpublished_version = latest_version.check_unfinished_or_unpublished_version() + if unpublished_version: + sentry.log_message('Failed to create a new version due to unpublished pending version already exists: ' + f'[version={unpublished_version.version}, ' + f'_id={unpublished_version._id}, ' + f'state={unpublished_version.machine_state}].') + raise UnpublishedPendingPreprintVersionExists + if unfinished_version: + sentry.log_message(f'Use existing initiated but unfinished version instead of creating a new one: ' + f'[version={unfinished_version.version}, ' + f'_id={unfinished_version._id}, ' + f'state={unfinished_version.machine_state}].') + return unfinished_version, None + + # Note: version number bumps from the last version number instead of the latest version number + last_version_number = guid_obj.versions.order_by('-version').first().version + + # Prepare the data to clone/update + data_to_update = { + 'subjects': [el for el in latest_version.subjects.all().values_list('_id', flat=True)], + 'tags': latest_version.tags.all().values_list('name', flat=True), + 'original_publication_date': latest_version.original_publication_date, + 'custom_publication_citation': latest_version.custom_publication_citation, + 'article_doi': latest_version.article_doi, 'has_coi': latest_version.has_coi, + 'conflict_of_interest_statement': latest_version.conflict_of_interest_statement, + 'has_data_links': latest_version.has_data_links, 'why_no_data': latest_version.why_no_data, + 'data_links': latest_version.data_links, + 'has_prereg_links': latest_version.has_prereg_links, + 'why_no_prereg': latest_version.why_no_prereg, 'prereg_links': latest_version.prereg_links, + } + if latest_version.license: + data_to_update['license_type'] = latest_version.license.node_license + data_to_update['license'] = { + 'copyright_holders': latest_version.license.copyright_holders, + 'year': latest_version.license.year + } + if latest_version.node: + data_to_update['node'] = latest_version.node + + # Create a preprint obj for the new version + preprint = cls( + provider=latest_version.provider, + title=latest_version.title, + creator=auth.user, + description=latest_version.description, + ) + preprint.save(guid_ready=False) + # Create a new entry in the `GuidVersionsThrough` table to store version information, which must happen right + # after the first `.save()` of the new preprint version object, which enables `preprint._id` to be computed. + guid_version = GuidVersionsThrough( + referent=preprint, + object_id=guid_obj.object_id, + content_type=guid_obj.content_type, + version=last_version_number + 1, + guid=guid_obj + ) + guid_version.save() + preprint.save(guid_ready=True, first_save=True) + + # Add contributors + for contributor in latest_version.contributor_set.exclude(user=preprint.creator): + try: + preprint.add_contributor( + contributor.user, + permissions=contributor.permission, + visible=contributor.visible, + save=True + ) + except (ValidationValueError, UserStateError) as e: + sentry.log_exception(e) + sentry.log_message(f'Contributor was not added to new preprint version due to error: ' + f'[preprint={preprint._id}, user={contributor.user._id}]') + # Add affiliated institutions + for institution in latest_version.affiliated_institutions.all(): + preprint.add_affiliated_institution(institution, auth.user, ignore_user_affiliation=True) + + # Update Guid obj to point to the new version if there is no moderation + if not preprint.provider.reviews_workflow: + guid_obj.referent = preprint + guid_obj.object_id = preprint.pk + guid_obj.content_type = ContentType.objects.get_for_model(preprint) + guid_obj.save() + + return preprint, data_to_update + @property def is_deleted(self): return bool(self.deleted) @@ -476,6 +661,15 @@ def csl(self): # formats node information into CSL format for citation parsing return csl + @property + def is_latest_version(self): + return self.guids.exists() + + def get_preprint_versions(self): + guids = self.versioned_guids.first().guid.versions.all() + preprint_versions = Preprint.objects.filter(id__in=[vg.object_id for vg in guids]).order_by('-id') + return preprint_versions + def web_url_for(self, view_name, _absolute=False, _guid=False, *args, **kwargs): return web_url_for(view_name, pid=self._id, _absolute=_absolute, _guid=_guid, *args, **kwargs) @@ -639,7 +833,33 @@ def get_doi_client(self): return None def save(self, *args, **kwargs): - first_save = not bool(self.pk) + """Customize preprint save process, which has three steps. + + 1. Initial: this save happens before guid and versioned guid are created for the preprint; this save + creates the pk; after this save, none of `guids`, `versioned_guids` or `._id` is available. + 2. First: this save happens and must happen right after versioned guid have been created; this is the + same "first save" as it was before preprint became versioned; the only change is that `pk` already exists + 3. This is the case for all subsequent saves after initial and first save. + + Note: When creating a preprint or new version , must use Preprint.create() or Preprint.create_version() + respectively, which handles the save process automatically. + """ + initial_save = not kwargs.pop('guid_ready', True) + if initial_save: + # Save when guid and versioned guid are not ready + return super().save(*args, **kwargs) + + # Preprint must have PK and _id set before continue + if not bool(self.pk): + err_msg = 'Preprint must have pk!' + sentry.log_message(err_msg) + raise IntegrityError(err_msg) + if not self._id: + err_msg = 'Preprint must have _id!' + sentry.log_message(err_msg) + raise IntegrityError(err_msg) + + first_save = kwargs.pop('first_save', False) saved_fields = self.get_dirty_fields() or [] if not first_save and ('ever_public' in saved_fields and saved_fields['ever_public']): @@ -921,8 +1141,12 @@ def bulk_update_search(cls, preprints, index=None): logger.exception(e) log_exception(e) - def update_search(self): - update_share(self) + def update_search(self, skip_share=False): + """Update SHARE and OSF search. + """ + if not skip_share: + update_share(self) + from website import search try: search.search.update_preprint(self, bulk=False, async_update=True) @@ -1278,6 +1502,74 @@ def update_prereg_link_info(self, auth: Auth, prereg_link_info: str, log: bool = if save: self.save() + def run_submit(self, user): + """Override `ReviewableMixin`/`MachineableMixin`. + Run the 'submit' state transition and create a corresponding Action. + + Params: + user: The user triggering this transition. + """ + ret = super().run_submit(user=user) + provider = self.provider + reviews_workflow = provider.reviews_workflow + # Only post moderation is relevant for Preprint, and hybrid moderation is included for integrity purpose. + need_guid_update = any( + [ + reviews_workflow == Workflows.POST_MODERATION.value, + reviews_workflow == Workflows.HYBRID_MODERATION.value and + any([ + provider.get_group('moderator') in user.groups.all(), + provider.get_group('admin') in user.groups.all() + ]) + ] + ) + # Only update the base guid obj to refer to the new version 1) if the provider is post-moderation, or 2) if the + # provider is hybrid-moderation and if the user who submits the preprint is a moderator or admin. + if need_guid_update: + base_guid_obj = self.versioned_guids.first().guid + base_guid_obj.referent = self + base_guid_obj.object_id = self.pk + base_guid_obj.content_type = ContentType.objects.get_for_model(self) + base_guid_obj.save() + return ret + + def run_accept(self, user, comment, **kwargs): + """Override `ReviewableMixin`/`MachineableMixin`. + Run the 'accept' state transition and create a corresponding Action. + + Params: + user: The user triggering this transition. + comment: Text describing why. + """ + ret = super().run_accept(user=user, comment=comment, **kwargs) + reviews_workflow = self.provider.reviews_workflow + if reviews_workflow == Workflows.PRE_MODERATION.value or reviews_workflow == Workflows.HYBRID_MODERATION.value: + base_guid_obj = self.versioned_guids.first().guid + base_guid_obj.referent = self + base_guid_obj.object_id = self.pk + base_guid_obj.content_type = ContentType.objects.get_for_model(self) + base_guid_obj.save() + + versioned_guid = self.versioned_guids.first() + if versioned_guid.is_rejected: + versioned_guid.is_rejected = False + versioned_guid.save() + return ret + + def run_reject(self, user, comment): + """Override `ReviewableMixin`/`MachineableMixin`. + Run the 'reject' state transition and create a corresponding Action. + + Params: + user: The user triggering this transition. + comment: Text describing why. + """ + ret = super().run_reject(user=user, comment=comment) + versioned_guid = self.versioned_guids.first() + versioned_guid.is_rejected = True + versioned_guid.save() + return ret + @receiver(post_save, sender=Preprint) def create_file_node(sender, instance, **kwargs): if instance.root_folder: diff --git a/osf/models/spam.py b/osf/models/spam.py index 993039b1fcf..2905a98cdcf 100644 --- a/osf/models/spam.py +++ b/osf/models/spam.py @@ -190,6 +190,8 @@ def check_spam(self, user, saved_fields, request, save=False): pass def do_check_spam(self, author, author_email, content, request_headers): + from osf.models.preprint import Preprint + if self.is_hammy: return if self.is_spammy: @@ -200,9 +202,12 @@ def do_check_spam(self, author, author_email, content, request_headers): 'user_agent': request_headers.get('User-Agent'), 'referer': request_headers.get('Referer'), } - + if isinstance(self, Preprint): + guid__id = self._id + else: + guid__id = self.guids.first()._id check_resource_for_domains_postcommit( - self.guids.first()._id, + guid__id, content, ) diff --git a/osf/utils/machines.py b/osf/utils/machines.py index 8e4b8f07338..1b286f3cee3 100644 --- a/osf/utils/machines.py +++ b/osf/utils/machines.py @@ -123,7 +123,17 @@ def save_changes(self, ev): self.machineable.save() def resubmission_allowed(self, ev): - return self.machineable.provider.reviews_workflow == Workflows.PRE_MODERATION.value + """Allow resubmission 1) if the preprint uses the PRE_MODERATION workflow, or 2) if it uses the POST_MODERATION + workflow and is in a pending state. + """ + workflow = self.machineable.provider.reviews_workflow + result = any( + [ + workflow == Workflows.PRE_MODERATION.value, + workflow == Workflows.POST_MODERATION.value and self.machineable.machine_state == 'pending' + ] + ) + return result def perform_withdraw(self, ev): self.machineable.date_withdrawn = self.action.created if self.action is not None else timezone.now() diff --git a/osf_tests/factories.py b/osf_tests/factories.py index 0bd1664977d..c923c62e6b1 100644 --- a/osf_tests/factories.py +++ b/osf_tests/factories.py @@ -4,11 +4,11 @@ import datetime from random import randint from unittest import mock + from factory import SubFactory from factory.fuzzy import FuzzyDateTime, FuzzyAttribute, FuzzyChoice from unittest.mock import patch, Mock -import factory import pytz import factory.django from factory.django import DjangoModelFactory @@ -20,7 +20,6 @@ from django.db.utils import IntegrityError from faker import Factory, Faker from waffle.models import Flag, Sample, Switch - from website.notifications.constants import NOTIFICATION_TYPES from osf.utils import permissions from website.archiver import ARCHIVER_SUCCESS @@ -728,23 +727,52 @@ def _build(cls, target_class, *args, **kwargs): @classmethod def _create(cls, target_class, *args, **kwargs): + update_task_patcher = mock.patch('website.preprints.tasks.on_preprint_updated.si') update_task_patcher.start() + # Step 1: create prerpint, guid and versioned guid + instance = cls._build(target_class, *args, **kwargs) + instance.save(guid_ready=False) + # If use_guid is passed, use that guid instead of creating a new one + if use_guid := kwargs.pop('use_guid', None): + guid = use_guid + guid.referent = instance + guid.content_type = ContentType.objects.get_for_model(instance) + guid.object_id = instance.pk + guid.save() + else: + guid = models.Guid.objects.create( + referent=instance, + content_type=ContentType.objects.get_for_model(instance), + object_id=instance.pk, + ) + # If guid__id is passed, set the guid's id to that value + if guid__id := kwargs.pop('set_guid', None): + guid._id = guid__id + guid.save() + + models.GuidVersionsThrough.objects.create( + referent=instance, + content_type=ContentType.objects.get_for_model(instance), + object_id=instance.pk, + version=1, + guid=guid + ) + instance.save(guid_ready=True, first_save=True) + + # Step 2: finish preprint creation work flow finish = kwargs.pop('finish', True) set_doi = kwargs.pop('set_doi', True) + user = kwargs.pop('creator', None) or instance.creator is_published = kwargs.pop('is_published', True) - instance = cls._build(target_class, *args, **kwargs) file_size = kwargs.pop('file_size', 1337) - doi = kwargs.pop('doi', None) license_details = kwargs.pop('license_details', None) filename = kwargs.pop('filename', None) or 'preprint_file.txt' subjects = kwargs.pop('subjects', None) or [[SubjectFactory()._id]] instance.article_doi = doi - - user = kwargs.pop('creator', None) or instance.creator - instance.save() + machine_state = kwargs.pop('machine_state', 'initial') preprint_file = OsfStorageFile.create( target_object_id=instance.id, @@ -753,7 +781,7 @@ def _create(cls, target_class, *args, **kwargs): name=filename, materialized_path=f'/{filename}') - instance.machine_state = kwargs.pop('machine_state', 'initial') + instance.machine_state = machine_state preprint_file.save() from addons.osfstorage import settings as osfstorage_settings @@ -783,6 +811,111 @@ def _create(cls, target_class, *args, **kwargs): instance.save() return instance + # TODO: maybe we should use model's `create_version` and use api's update_data + @classmethod + def create_version(cls, create_from, creator=None, final_machine_state='accepted', is_published=True, set_doi=True): + + # Run a few checks + assert final_machine_state in ['pending', 'accepted', 'initial'] + assert create_from and not create_from.has_unpublished_pending_version() + guid_obj = create_from.guids.first() + latest_version = guid_obj.referent + assert latest_version.is_latest_version + last_version_number = guid_obj.versions.order_by('-version').first().version + if not creator: + creator = latest_version.creator + + update_task_patcher = mock.patch('website.preprints.tasks.on_preprint_updated.si') + update_task_patcher.start() + + # Create new version from the latest version + target_class = cls._meta.model + instance = cls._build( + target_class, + provider=latest_version.provider, + title=latest_version.title, + description=latest_version.description, + creator=creator + ) + instance.save(guid_ready=False) + models.GuidVersionsThrough.objects.create( + referent=instance, + content_type=ContentType.objects.get_for_model(instance), + object_id=instance.pk, + guid=guid_obj, + version=last_version_number + 1 + ) + instance.machine_state = 'initial' + instance.node = latest_version.node + instance.save(guid_ready=True, first_save=True) + + # Prepare and add file + filename = f'preprint_file_{last_version_number + 1}.txt' + preprint_file = OsfStorageFile.create( + target_object_id=instance.id, + target_content_type=ContentType.objects.get_for_model(instance), + path=f'/{filename}', + name=filename, + materialized_path=f'/{filename}' + ) + preprint_file.save() + auth = Auth(instance.creator) + instance.set_primary_file(preprint_file, auth=auth, save=True) + from addons.osfstorage import settings as osfstorage_settings + location = { + 'object': '06d80e', + 'service': 'cloud', + osfstorage_settings.WATERBUTLER_RESOURCE: 'osf', + } + metadata = { + 'size': 1357, + 'contentType': 'img/png' + } + preprint_file.create_version(creator, location, metadata=metadata).save() + + # Set relationships + contributor_list = latest_version.contributor_set.exclude( + user=latest_version.creator + ).values('visible', 'user_id', '_order') + for contributor in contributor_list: + instance.contributor_set.create(**{**contributor, 'preprint_id': instance.id}) + for institution in latest_version.affiliated_institutions.all(): + instance.add_affiliated_institution(institution, auth.user, ignore_user_affiliation=True) + + # Set other fields + # TODO: we should copy all fields + if latest_version.license: + license_details = { + 'id': latest_version.license.node_license.license_id, + 'copyrightHolders': latest_version.license.copyright_holders, + 'year': latest_version.license.year + } + instance.set_preprint_license(license_details, auth=auth) + subjects = [[el] for el in latest_version.subjects.all().values_list('_id', flat=True)] + instance.set_subjects(subjects, auth=auth) + instance.article_doi = latest_version.article_doi + instance.set_published(is_published, auth=auth) + # Note: machine_state needs to be adjusted after `set_published` is called. + instance.machine_state = final_machine_state + instance.save() + update_task_patcher.stop() + + if not is_published: + assert not set_doi and final_machine_state != 'accepted' + else: + if set_doi: + create_task_patcher = mock.patch('website.identifiers.utils.request_identifiers') + mock_create_identifier = create_task_patcher.start() + mock_create_identifier.side_effect = sync_set_identifiers(instance) + create_task_patcher.stop() + instance.is_published = True + guid_obj.referent = instance + guid_obj.object_id = instance.pk + guid_obj.save() + + instance.save() + return instance + class TagFactory(DjangoModelFactory): class Meta: model = models.Tag diff --git a/osf_tests/management_commands/fixtures/crossref_works_response.json b/osf_tests/management_commands/fixtures/crossref_works_response.json index 06ac5e57c33..a2a243904b5 100644 --- a/osf_tests/management_commands/fixtures/crossref_works_response.json +++ b/osf_tests/management_commands/fixtures/crossref_works_response.json @@ -50,7 +50,7 @@ "crossmark-restriction": false }, "abstract": "

Mock Abstract

", - "DOI": "10.31236/FK2osf.io/guid0", + "DOI": "10.31236/FK2osf.io/guid0_v1", "type": "posted-content", "created": { "date-parts": [ @@ -105,7 +105,7 @@ ] }, "references-count": 0, - "URL": "http://dx.doi.org/10.31236/osf.io/guid0", + "URL": "http://dx.doi.org/10.31236/osf.io/guid0_v1", "subtype": "preprint" }], "items-per-page": 20, diff --git a/osf_tests/management_commands/test_check_crossref_dois.py b/osf_tests/management_commands/test_check_crossref_dois.py index 1722cad3892..c4e37d9c389 100644 --- a/osf_tests/management_commands/test_check_crossref_dois.py +++ b/osf_tests/management_commands/test_check_crossref_dois.py @@ -22,7 +22,7 @@ def preprint(self): @pytest.fixture() def stuck_preprint(self): - preprint = PreprintFactory(set_doi=False) + preprint = PreprintFactory(set_doi=False, set_guid='guid0') preprint.date_published = preprint.date_published - timedelta(days=settings.DAYS_CROSSREF_DOIS_MUST_BE_STUCK_BEFORE_EMAIL + 1) # match guid to the fixture crossref_works_response.json guid = preprint.guids.first() @@ -43,7 +43,7 @@ def crossref_response(self): @responses.activate @mock.patch('osf.models.preprint.update_or_enqueue_on_preprint_updated', mock.Mock()) def test_check_crossref_dois(self, crossref_response, stuck_preprint, preprint): - doi = settings.DOI_FORMAT.format(prefix=stuck_preprint.provider.doi_prefix, guid=stuck_preprint.guids.first()._id) + doi = settings.DOI_FORMAT.format(prefix=stuck_preprint.provider.doi_prefix, guid=stuck_preprint._id) responses.add( responses.Response( responses.GET, diff --git a/osf_tests/metadata/expected_metadata_files/preprint_basic.datacite.json b/osf_tests/metadata/expected_metadata_files/preprint_basic.datacite.json index c6d4527b189..96e1b51c490 100644 --- a/osf_tests/metadata/expected_metadata_files/preprint_basic.datacite.json +++ b/osf_tests/metadata/expected_metadata_files/preprint_basic.datacite.json @@ -48,7 +48,7 @@ ], "fundingReferences": [], "identifier": { - "identifier": "11.pp/FK2osf.io/w4ibb", + "identifier": "11.pp/FK2osf.io/w4ibb_v1", "identifierType": "DOI" }, "publicationYear": "2123", diff --git a/osf_tests/metadata/expected_metadata_files/preprint_basic.datacite.xml b/osf_tests/metadata/expected_metadata_files/preprint_basic.datacite.xml index 762cc061c53..57d9d39eb1f 100644 --- a/osf_tests/metadata/expected_metadata_files/preprint_basic.datacite.xml +++ b/osf_tests/metadata/expected_metadata_files/preprint_basic.datacite.xml @@ -1,6 +1,6 @@ - 11.pp/FK2osf.io/w4ibb + 11.pp/FK2osf.io/w4ibb_v1 Person McNamington diff --git a/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle b/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle index ee7e866827b..d630032f71b 100644 --- a/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle +++ b/osf_tests/metadata/expected_metadata_files/preprint_basic.turtle @@ -13,7 +13,7 @@ dcterms:description "this is a preprint description!" ; dcterms:hasVersion ; dcterms:identifier "http://localhost:5000/w4ibb", - "https://doi.org/11.pp/FK2osf.io/w4ibb" ; + "https://doi.org/11.pp/FK2osf.io/w4ibb_v1" ; dcterms:modified "2123-05-04" ; dcterms:publisher ; dcterms:subject , @@ -22,7 +22,7 @@ ; dcterms:title "this is a preprint title!" ; dcterms:type ; - owl:sameAs ; + owl:sameAs ; dcat:accessService ; osf:hostingInstitution ; osf:isSupplementedBy ; @@ -94,4 +94,3 @@ a skos:Concept ; skos:inScheme ; skos:prefLabel "wibbble" . - diff --git a/osf_tests/metadata/expected_metadata_files/preprint_full.datacite.json b/osf_tests/metadata/expected_metadata_files/preprint_full.datacite.json index 7858cc20bd9..5d9516b2b7a 100644 --- a/osf_tests/metadata/expected_metadata_files/preprint_full.datacite.json +++ b/osf_tests/metadata/expected_metadata_files/preprint_full.datacite.json @@ -48,7 +48,7 @@ ], "fundingReferences": [], "identifier": { - "identifier": "11.pp/FK2osf.io/w4ibb", + "identifier": "11.pp/FK2osf.io/w4ibb_v1", "identifierType": "DOI" }, "publicationYear": "2123", diff --git a/osf_tests/metadata/expected_metadata_files/preprint_full.datacite.xml b/osf_tests/metadata/expected_metadata_files/preprint_full.datacite.xml index 5803ee9f2f0..94739e3b322 100644 --- a/osf_tests/metadata/expected_metadata_files/preprint_full.datacite.xml +++ b/osf_tests/metadata/expected_metadata_files/preprint_full.datacite.xml @@ -1,6 +1,6 @@ - 11.pp/FK2osf.io/w4ibb + 11.pp/FK2osf.io/w4ibb_v1 Person McNamington diff --git a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle index cdf665fd5fe..56486195974 100644 --- a/osf_tests/metadata/expected_metadata_files/preprint_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/preprint_full.turtle @@ -13,7 +13,7 @@ dcterms:description "this is a preprint description!" ; dcterms:hasVersion ; dcterms:identifier "http://localhost:5000/w4ibb", - "https://doi.org/11.pp/FK2osf.io/w4ibb" ; + "https://doi.org/11.pp/FK2osf.io/w4ibb_v1" ; dcterms:modified "2123-05-04" ; dcterms:publisher ; dcterms:subject , @@ -22,7 +22,7 @@ ; dcterms:title "this is a preprint title!" ; dcterms:type ; - owl:sameAs ; + owl:sameAs ; dcat:accessService ; osf:hostingInstitution ; osf:isSupplementedBy ; @@ -124,4 +124,3 @@ a skos:Concept ; skos:inScheme ; skos:prefLabel "wibbble" . - diff --git a/osf_tests/metadata/expected_metadata_files/project_basic.datacite.json b/osf_tests/metadata/expected_metadata_files/project_basic.datacite.json index 0de527a5356..1f85e773f2f 100644 --- a/osf_tests/metadata/expected_metadata_files/project_basic.datacite.json +++ b/osf_tests/metadata/expected_metadata_files/project_basic.datacite.json @@ -60,7 +60,7 @@ "relationType": "HasVersion" }, { - "relatedIdentifier": "11.pp/FK2osf.io/w4ibb", + "relatedIdentifier": "11.pp/FK2osf.io/w4ibb_v1", "relatedIdentifierType": "DOI", "relationType": "IsSupplementTo" } @@ -85,7 +85,7 @@ "publicationYear": "2123", "publisher": "PP the Preprint Provider", "relatedItemIdentifier": { - "relatedItemIdentifier": "11.pp/FK2osf.io/w4ibb", + "relatedItemIdentifier": "11.pp/FK2osf.io/w4ibb_v1", "relatedItemIdentifierType": "DOI" }, "relatedItemType": "Preprint", diff --git a/osf_tests/metadata/expected_metadata_files/project_basic.datacite.xml b/osf_tests/metadata/expected_metadata_files/project_basic.datacite.xml index e1f3e6394c0..8b4abfb5d87 100644 --- a/osf_tests/metadata/expected_metadata_files/project_basic.datacite.xml +++ b/osf_tests/metadata/expected_metadata_files/project_basic.datacite.xml @@ -37,7 +37,7 @@ http://localhost:5000/w5ibb - 11.pp/FK2osf.io/w4ibb + 11.pp/FK2osf.io/w4ibb_v1 @@ -49,7 +49,7 @@ RegiProvi the Registration Provider - 11.pp/FK2osf.io/w4ibb + 11.pp/FK2osf.io/w4ibb_v1 this is a preprint title! diff --git a/osf_tests/metadata/expected_metadata_files/project_basic.turtle b/osf_tests/metadata/expected_metadata_files/project_basic.turtle index aa8244da1fd..832dd6170c0 100644 --- a/osf_tests/metadata/expected_metadata_files/project_basic.turtle +++ b/osf_tests/metadata/expected_metadata_files/project_basic.turtle @@ -33,11 +33,11 @@ dcterms:created "2123-05-04" ; dcterms:creator ; dcterms:identifier "http://localhost:5000/w4ibb", - "https://doi.org/11.pp/FK2osf.io/w4ibb" ; + "https://doi.org/11.pp/FK2osf.io/w4ibb_v1" ; dcterms:publisher ; dcterms:title "this is a preprint title!" ; dcterms:type ; - owl:sameAs . + owl:sameAs . a osf:Registration ; dcterms:created "2123-05-04" ; diff --git a/osf_tests/metadata/expected_metadata_files/project_full.datacite.json b/osf_tests/metadata/expected_metadata_files/project_full.datacite.json index 463d8af4d88..4ead1090105 100644 --- a/osf_tests/metadata/expected_metadata_files/project_full.datacite.json +++ b/osf_tests/metadata/expected_metadata_files/project_full.datacite.json @@ -94,7 +94,7 @@ "relationType": "HasVersion" }, { - "relatedIdentifier": "11.pp/FK2osf.io/w4ibb", + "relatedIdentifier": "11.pp/FK2osf.io/w4ibb_v1", "relatedIdentifierType": "DOI", "relationType": "IsSupplementTo" } @@ -119,7 +119,7 @@ "publicationYear": "2123", "publisher": "PP the Preprint Provider", "relatedItemIdentifier": { - "relatedItemIdentifier": "11.pp/FK2osf.io/w4ibb", + "relatedItemIdentifier": "11.pp/FK2osf.io/w4ibb_v1", "relatedItemIdentifierType": "DOI" }, "relatedItemType": "Preprint", diff --git a/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml b/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml index eff81de6099..e8649704d02 100644 --- a/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml +++ b/osf_tests/metadata/expected_metadata_files/project_full.datacite.xml @@ -55,7 +55,7 @@ http://localhost:5000/w5ibb - 11.pp/FK2osf.io/w4ibb + 11.pp/FK2osf.io/w4ibb_v1 @@ -67,7 +67,7 @@ RegiProvi the Registration Provider - 11.pp/FK2osf.io/w4ibb + 11.pp/FK2osf.io/w4ibb_v1 this is a preprint title! diff --git a/osf_tests/metadata/expected_metadata_files/project_full.turtle b/osf_tests/metadata/expected_metadata_files/project_full.turtle index 63946b2f80b..3fbc31f7ba0 100644 --- a/osf_tests/metadata/expected_metadata_files/project_full.turtle +++ b/osf_tests/metadata/expected_metadata_files/project_full.turtle @@ -39,11 +39,11 @@ dcterms:created "2123-05-04" ; dcterms:creator ; dcterms:identifier "http://localhost:5000/w4ibb", - "https://doi.org/11.pp/FK2osf.io/w4ibb" ; + "https://doi.org/11.pp/FK2osf.io/w4ibb_v1" ; dcterms:publisher ; dcterms:title "this is a preprint title!" ; dcterms:type ; - owl:sameAs . + owl:sameAs . a osf:Registration ; dcterms:created "2123-05-04" ; diff --git a/osf_tests/metadata/test_osf_gathering.py b/osf_tests/metadata/test_osf_gathering.py index 4c064c8a690..4c95711c615 100644 --- a/osf_tests/metadata/test_osf_gathering.py +++ b/osf_tests/metadata/test_osf_gathering.py @@ -124,7 +124,7 @@ def test_setupdata(self): assert self.registrationfocus.iri == OSFIO[self.registration._id] assert self.registrationfocus.rdftype == OSF.Registration assert self.registrationfocus.dbmodel is self.registration - assert self.preprintfocus.iri == OSFIO[self.preprint._id] + assert self.preprintfocus.iri == OSFIO[self.preprint.get_guid()._id] assert self.preprintfocus.rdftype == OSF.Preprint assert self.preprintfocus.dbmodel is self.preprint assert self.filefocus.iri == OSFIO[self.file.get_guid()._id] diff --git a/osf_tests/metadata/test_serialized_metadata.py b/osf_tests/metadata/test_serialized_metadata.py index c8a0eee95ac..9300f20f778 100644 --- a/osf_tests/metadata/test_serialized_metadata.py +++ b/osf_tests/metadata/test_serialized_metadata.py @@ -250,6 +250,7 @@ def setUp(self): subjects=[ [parent_subject._id, child_subject._id], ], + use_guid=osfguid_sequence.get_or_create(id=-1)[0] ) self.registration = factories.RegistrationFactory( is_public=True, diff --git a/osf_tests/metrics/reporters/test_public_item_usage_reporter.py b/osf_tests/metrics/reporters/test_public_item_usage_reporter.py index b75c420b1a2..69bd266285a 100644 --- a/osf_tests/metrics/reporters/test_public_item_usage_reporter.py +++ b/osf_tests/metrics/reporters/test_public_item_usage_reporter.py @@ -27,8 +27,7 @@ def _patch_settings(self): @pytest.fixture def item0(self): - _item0 = factories.PreprintFactory(is_public=True) - _item0._id = 'item0' + _item0 = factories.PreprintFactory(is_public=True, set_guid='item0') return _item0 @pytest.fixture @@ -186,7 +185,7 @@ def test_reporter(self, ym_empty, ym_sparse, ym_busy, sparse_month_usage, busy_m _sparse_item0, _sparse_item1, _sparse_item2 = sorted(_sparse, key=attrgetter('item_osfid')) # sparse-month item0 assert isinstance(_sparse_item0, PublicItemUsageReport) - assert _sparse_item0.item_osfid == 'item0' + assert _sparse_item0.item_osfid == 'item0_v1' assert _sparse_item0.provider_id == [item0.provider._id] assert _sparse_item0.platform_iri == ['http://osf.example'] assert _sparse_item0.view_count == 3 @@ -217,7 +216,7 @@ def test_reporter(self, ym_empty, ym_sparse, ym_busy, sparse_month_usage, busy_m _busy_item0, _busy_item1, _busy_item2 = sorted(_busy, key=attrgetter('item_osfid')) # busy-month item0 assert isinstance(_busy_item0, PublicItemUsageReport) - assert _busy_item0.item_osfid == 'item0' + assert _busy_item0.item_osfid == 'item0_v1' assert _busy_item0.provider_id == [item0.provider._id] assert _busy_item0.platform_iri == ['http://osf.example'] assert _busy_item0.view_count == 4 * 7 diff --git a/osf_tests/test_elastic_search.py b/osf_tests/test_elastic_search.py index b2815fc169f..56c42391095 100644 --- a/osf_tests/test_elastic_search.py +++ b/osf_tests/test_elastic_search.py @@ -524,7 +524,7 @@ def setUp(self): search.delete_index(elastic_search.INDEX) search.create_index(elastic_search.INDEX) self.user = factories.UserFactory(fullname='John Deacon') - self.preprint = Preprint( + self.preprint = Preprint.create( title='Red Special', description='We are the champions', creator=self.user, diff --git a/osf_tests/test_generate_sitemap.py b/osf_tests/test_generate_sitemap.py index 37dfd17a5e8..673183f75ae 100644 --- a/osf_tests/test_generate_sitemap.py +++ b/osf_tests/test_generate_sitemap.py @@ -98,6 +98,15 @@ def preprint_osf(self, project_preprint_osf, user_admin_project_public, provider return PreprintFactory(project=project_preprint_osf, creator=user_admin_project_public, provider=provider_osf) + @pytest.fixture(autouse=True) + def preprint_osf_blank(self, project_preprint_osf, user_admin_project_public, provider_osf): + return PreprintFactory(project=project_preprint_osf, + creator=user_admin_project_public, + provider=provider_osf) + + @pytest.fixture(autouse=True) + def preprint_osf_version(self, preprint_osf_blank): + return PreprintFactory.create_version(create_from=preprint_osf_blank, creator=preprint_osf_blank.creator) @pytest.fixture(autouse=True) def preprint_withdrawn(self, project_preprint_osf, user_admin_project_public, provider_osf): @@ -118,7 +127,7 @@ def preprint_other(self, project_preprint_other, user_admin_project_public, prov def all_included_links(self, user_admin_project_public, user_admin_project_private, project_registration_public, project_preprint_osf, project_preprint_other, registration_active, provider_other, provider_osf, - preprint_osf, preprint_other, preprint_withdrawn): + preprint_osf, preprint_osf_version, preprint_other, preprint_withdrawn): # Return urls of all fixtures urls_to_include = [item['loc'] for item in settings.SITEMAP_STATIC_URLS] urls_to_include.extend([ @@ -129,9 +138,11 @@ def all_included_links(self, user_admin_project_public, user_admin_project_priva project_preprint_other.url, registration_active.url, f'/preprints/{provider_osf._id}/{preprint_osf._id}', + f'/preprints/{provider_osf._id}/{preprint_osf_version._id}', f'/preprints/{provider_other._id}/{preprint_other._id}', f'/preprints/{provider_osf._id}/{preprint_withdrawn._id}', f'/{preprint_osf._id}/download/?format=pdf', + f'/{preprint_osf_version._id}/download/?format=pdf', f'/{preprint_other._id}/download/?format=pdf' ]) urls_to_include = [urljoin(settings.DOMAIN, item) for item in urls_to_include] diff --git a/osf_tests/test_guid.py b/osf_tests/test_guid.py index e4fb28cfa6f..af4ab7c139d 100644 --- a/osf_tests/test_guid.py +++ b/osf_tests/test_guid.py @@ -1,17 +1,28 @@ from unittest import mock -import pytest from urllib.parse import quote + from django.utils import timezone from django.core.exceptions import MultipleObjectsReturned +import pytest -from osf.models import Guid, NodeLicenseRecord, OSFUser -from osf_tests.factories import AuthUserFactory, UserFactory, NodeFactory, NodeLicenseRecordFactory, \ - RegistrationFactory, PreprintFactory, PreprintProviderFactory +from framework.auth import Auth +from osf.models import Guid, GuidVersionsThrough, NodeLicenseRecord, OSFUser, Preprint +from osf.models.base import VersionedGuidMixin from osf.utils.permissions import ADMIN +from osf_tests.factories import ( + AuthUserFactory, + NodeFactory, + NodeLicenseRecordFactory, + PreprintFactory, + PreprintProviderFactory, + RegistrationFactory, + UserFactory, +) from tests.base import OsfTestCase from tests.test_websitefiles import TestFile from website.settings import MFR_SERVER_URL, WATERBUTLER_URL + @pytest.mark.django_db class TestGuid: @@ -39,6 +50,7 @@ def test_short_guid_gets_generated_on_creation(self, Factory): assert obj._id assert len(obj._id) == 5 + @pytest.mark.django_db class TestReferent: @@ -433,7 +445,6 @@ def test_resolve_guid_download_errors(self): assert res.status_code == 404 pp = PreprintFactory(is_published=False) - res = self.app.get(pp.url + 'download') assert res.status_code == 404 @@ -452,3 +463,109 @@ def test_resolve_guid_download_errors(self): res = self.app.get(pp.url + 'download', auth=non_contrib.auth) assert res.status_code == 410 + + +@pytest.fixture() +def creator(): + return AuthUserFactory() + +@pytest.fixture() +def auth(creator): + return Auth(user=creator) + +@pytest.fixture() +def preprint_provider(): + return PreprintProviderFactory() + + +@pytest.mark.django_db +class TestGuidVersionsThrough: + def test_creation_versioned_guid(self, creator, preprint_provider): + preprint = Preprint.create( + creator=creator, + provider=preprint_provider, + title='Preprint', + description='Abstract' + ) + assert preprint.guids.count() == 1 + assert preprint.creator == creator + assert preprint.provider == preprint_provider + assert preprint.title == 'Preprint' + assert preprint.description == 'Abstract' + + preprint_guid = preprint.guids.first() + assert preprint_guid.referent == preprint + assert preprint_guid.content_type.model == 'preprint' + assert preprint_guid.object_id == preprint.pk + assert preprint_guid.is_versioned is True + + version_entry = preprint.versioned_guids.first() + assert version_entry.guid == preprint_guid + assert version_entry.referent == preprint + assert version_entry.content_type.model == 'preprint' + assert version_entry.object_id == preprint.pk + assert version_entry.version == 1 + + def test_create_version(self, creator, preprint_provider): + preprint = PreprintFactory(creator=creator) + assert preprint.guids.count() == 1 + preprint_guid = preprint.guids.first() + + preprint_metadata = { + 'subjects': [el for el in preprint.subjects.all().values_list('_id', flat=True)], + 'original_publication_date': preprint.original_publication_date, + 'custom_publication_citation': preprint.custom_publication_citation, + 'article_doi': preprint.article_doi, + 'has_coi': preprint.has_coi, + 'conflict_of_interest_statement': preprint.conflict_of_interest_statement, + 'has_data_links': preprint.has_data_links, + 'why_no_data': preprint.why_no_data, + 'data_links': preprint.data_links, + 'has_prereg_links': preprint.has_prereg_links, + 'why_no_prereg': preprint.why_no_prereg, + 'prereg_links': preprint.prereg_links, + } + if preprint.node: + preprint_metadata['node'] = preprint.node + if preprint.license: + preprint_metadata['license_type'] = preprint.license.node_license + preprint_metadata['license'] = { + 'copyright_holders': preprint.license.copyright_holders, + 'year': preprint.license.year + } + auth = Auth(user=creator) + new_preprint, data_for_update = Preprint.create_version(create_from_guid=preprint._id, auth=auth) + tags = data_for_update.pop('tags') + assert list(tags) == list(preprint.tags.all().values_list('name', flat=True)) + assert preprint_metadata == data_for_update + + new_version = new_preprint.versioned_guids.first() + + assert preprint.guids.count() == 0 + assert preprint.versioned_guids.count() == 1 + assert preprint.files.count() == 1 + assert new_preprint.guids.count() == 1 + assert new_preprint.versioned_guids.count() == 1 + assert new_preprint.files.count() == 0 + + assert new_version.referent == new_preprint + assert new_version.object_id == new_preprint.pk + assert new_version.content_type.model == 'preprint' + assert new_version.guid == preprint_guid + assert new_version.version == 2 + assert preprint_guid.versions.count() == 2 + + def test_versioned_preprint_id_property(self, creator, preprint_provider): + preprint = Preprint.create( + creator=creator, + provider=preprint_provider, + title='Preprint', + description='Abstract' + ) + preprint_guid = preprint.guids.first() + expected_guid = f'{preprint_guid._id}{VersionedGuidMixin.GUID_VERSION_DELIMITER}{VersionedGuidMixin.INITIAL_VERSION_NUMBER}' + assert preprint._id == expected_guid + + GuidVersionsThrough.objects.filter(guid=preprint_guid).delete() + preprint._id = None + assert preprint._id is None diff --git a/osf_tests/test_preprint_factory.py b/osf_tests/test_preprint_factory.py new file mode 100644 index 00000000000..591cd2550a9 --- /dev/null +++ b/osf_tests/test_preprint_factory.py @@ -0,0 +1,165 @@ +import pytest +from faker import Faker +from osf_tests.factories import PreprintFactory, AuthUserFactory, PreprintProviderFactory +from osf.models import Preprint +from framework.auth import Auth + +fake = Faker() + + +@pytest.fixture() +def creator(): + return AuthUserFactory() + + +@pytest.fixture() +def auth(creator): + return Auth(user=creator) + + +@pytest.fixture() +def preprint_provider(): + return PreprintProviderFactory() + + +@pytest.fixture +def factory_preprint(): + return PreprintFactory() + + +@pytest.fixture +def model_preprint(creator, preprint_provider): + return Preprint.create( + creator=creator, + provider=preprint_provider, + title='Preprint', + description='Abstract' + ) + + +@pytest.mark.django_db +class TestPreprintFactory: + def test_factory_creates_valid_instance(self): + preprint = PreprintFactory() + assert preprint.title is not None + assert preprint.description is not None + assert preprint.creator is not None + assert preprint.provider is not None + + def test_factory_build_method(self): + preprint = PreprintFactory._build(Preprint) + assert isinstance(preprint, Preprint) + assert preprint.pk is None + + def test_factory_create_method(self): + preprint = PreprintFactory() + assert isinstance(preprint, Preprint) + assert preprint.pk is not None + + def test_factory_custom_fields(self): + custom_title = 'Custom Preprint Title' + custom_description = 'Custom Description' + preprint = PreprintFactory(title=custom_title, description=custom_description) + assert preprint.title == custom_title + assert preprint.description == custom_description + + def test_factory_affiliated_institutions(self): + preprint = PreprintFactory() + assert preprint.affiliated_institutions.count() == 0 + + def test_factory_published_state(self): + preprint = PreprintFactory(is_published=True) + assert preprint.is_published + assert preprint.date_published is not None + + def test_factory_unpublished_state(self): + preprint = PreprintFactory(is_published=False) + assert not preprint.is_published + assert preprint.date_published is None + + def test_factory_creates_guid_and_version(self): + preprint = PreprintFactory() + guid = preprint.guids.first() + guid_version = guid.versions.first() + + assert guid is not None + assert guid.referent == preprint + + assert guid_version is not None + assert guid_version.referent == preprint + assert guid_version.guid == guid + assert guid_version.version == 1 + + def test_create_version_increments_version_number(self): + original_preprint = PreprintFactory() + original_guid = original_preprint.guids.first() + assert original_guid is not None + assert original_guid.versions.count() == 1 + + new_preprint = PreprintFactory.create_version(create_from=original_preprint) + assert new_preprint is not None + assert original_guid.versions.count() == 2 + + versions = original_guid.versions.order_by('version') + assert versions[0].version == 1 # Original version + assert versions[1].version == 2 # New version + + def test_create_version_copies_fields(self): + title = 'Original Preprint Title' + description = 'Original description.' + original_preprint = PreprintFactory(title=title, description=description) + + new_preprint = PreprintFactory.create_version(create_from=original_preprint) + + assert new_preprint.title == title + assert new_preprint.description == description + assert new_preprint.provider == original_preprint.provider + assert new_preprint.creator == original_preprint.creator + + def test_create_version_copies_subjects(self): + original_preprint = PreprintFactory() + original_subjects = [[subject._id] for subject in original_preprint.subjects.all()] + + new_preprint = PreprintFactory.create_version(create_from=original_preprint) + + new_subjects = [[subject._id] for subject in new_preprint.subjects.all()] + assert original_subjects == new_subjects + + def test_create_version_copies_contributors(self): + original_preprint = PreprintFactory() + contributors_before = list( + original_preprint.contributor_set.exclude(user=original_preprint.creator).values_list('user_id', flat=True) + ) + + new_preprint = PreprintFactory.create_version(create_from=original_preprint) + + contributors_after = list( + new_preprint.contributor_set.exclude(user=new_preprint.creator).values_list('user_id', flat=True) + ) + assert contributors_before == contributors_after + + def test_create_version_with_machine_state(self): + original_preprint = PreprintFactory() + new_preprint = PreprintFactory.create_version( + create_from=original_preprint, final_machine_state='accepted' + ) + + assert new_preprint.machine_state == 'accepted' + + def test_create_version_published_flag(self): + original_preprint = PreprintFactory(is_published=True) + original_guid = original_preprint.guids.first() + new_preprint = PreprintFactory.create_version( + create_from=original_preprint, is_published=True + ) + original_guid.refresh_from_db() + assert new_preprint.is_published is True + assert original_guid.referent == new_preprint + + def test_create_version_unpublished(self): + original_preprint = PreprintFactory(is_published=True) + new_preprint = PreprintFactory.create_version( + create_from=original_preprint, is_published=False, set_doi=False, final_machine_state='pending' + ) + assert new_preprint.is_published is False + assert new_preprint.machine_state == 'pending' diff --git a/scripts/generate_sitemap.py b/scripts/generate_sitemap.py index 8cce0a40a29..c0b38739789 100644 --- a/scripts/generate_sitemap.py +++ b/scripts/generate_sitemap.py @@ -9,13 +9,15 @@ import xml import django +from django.contrib.sitemaps.views import sitemap + django.setup() import logging import tempfile from framework import sentry from framework.celery_tasks import app as celery_app -from django.db.models import Q +from django.db.models import Q, F, OuterRef, Subquery from osf.models import OSFUser, AbstractNode, Preprint, PreprintProvider from osf.utils.workflows import DefaultStates from scripts import utils as script_utils @@ -196,12 +198,20 @@ def generate(self): self.log_errors('NODE', obj['guids___id'], e) progress.increment() progress.stop() + #Removed previous logic as it blocked withdrawn preprints to get in sitemap generator + objs = Preprint.objects.filter(date_published__isnull=False).annotate( + most_recent_non_withdrawn=Subquery( + Preprint.objects.filter( + guids=OuterRef('guids') + ).order_by('-versioned_guids__version').values('versioned_guids__version')[:1] + ) + ).order_by( + 'versioned_guids__guid_id', + '-is_published', + '-versioned_guids__version' + ).distinct('versioned_guids__guid_id') - # Preprint urls - - objs = (Preprint.objects.can_view() - .select_related('node', 'provider', 'primary_file')) - progress.start(objs.count() * 2, 'PREP: ') + progress.start(len(objs) * 2, 'PREP: ') for obj in objs: try: preprint_date = obj.modified.strftime('%Y-%m-%d') diff --git a/tests/identifiers/test_crossref.py b/tests/identifiers/test_crossref.py index a7164b569d5..719566d3571 100644 --- a/tests/identifiers/test_crossref.py +++ b/tests/identifiers/test_crossref.py @@ -40,6 +40,11 @@ def preprint(): preprint.license.node_license.url = 'https://creativecommons.org/licenses/by/4.0/legalcode' return preprint +@pytest.fixture() +def preprint_version(preprint): + versioned_preprint = PreprintFactory.create_version(preprint) + return versioned_preprint + @pytest.fixture() def crossref_success_response(): return b""" @@ -88,6 +93,11 @@ def test_crossref_build_doi(self, crossref_client, preprint): assert crossref_client.build_doi(preprint) == settings.DOI_FORMAT.format(prefix=doi_prefix, guid=preprint._id) + def test_crossref_build_doi_versioned(self, crossref_client, preprint_version): + doi_prefix = preprint_version.provider.doi_prefix + + assert crossref_client.build_doi(preprint_version) == settings.DOI_FORMAT.format(prefix=doi_prefix, guid=preprint_version._id) + def test_crossref_build_metadata(self, crossref_client, preprint): test_email = 'test-email@osf.io' with mock.patch('website.settings.CROSSREF_DEPOSITOR_EMAIL', test_email): @@ -113,11 +123,56 @@ def test_crossref_build_metadata(self, crossref_client, preprint): assert root.find('.//{%s}intra_work_relation' % crossref.CROSSREF_RELATIONS).text == preprint.article_doi assert root.find('.//{%s}doi' % crossref.CROSSREF_NAMESPACE).text == settings.DOI_FORMAT.format(prefix=preprint.provider.doi_prefix, guid=preprint._id) assert root.find('.//{%s}resource' % crossref.CROSSREF_NAMESPACE).text == settings.DOMAIN + preprint._id + assert root.find('.//{%s}doi_relations' % crossref.CROSSREF_NAMESPACE) is None metadata_date_parts = [elem.text for elem in root.find('.//{%s}posted_date' % crossref.CROSSREF_NAMESPACE)] preprint_date_parts = preprint.date_published.strftime('%Y-%m-%d').split('-') assert set(metadata_date_parts) == set(preprint_date_parts) + def test_crossref_build_metadata_versioned(self, crossref_client, preprint_version, preprint): + test_email = 'test-email@osf.io' + with mock.patch('website.settings.CROSSREF_DEPOSITOR_EMAIL', test_email): + crossref_xml = crossref_client.build_metadata(preprint_version) + root = lxml.etree.fromstring(crossref_xml) + + # header + assert root.find('.//{%s}doi_batch_id' % crossref.CROSSREF_NAMESPACE).text == preprint_version._id + assert root.find('.//{%s}depositor_name' % crossref.CROSSREF_NAMESPACE).text == crossref.CROSSREF_DEPOSITOR_NAME + assert root.find('.//{%s}email_address' % crossref.CROSSREF_NAMESPACE).text == test_email + + # body + contributors = root.find('.//{%s}contributors' % crossref.CROSSREF_NAMESPACE) + assert len(contributors.getchildren()) == len(preprint_version.visible_contributors) + + assert root.find('.//{%s}group_title' % crossref.CROSSREF_NAMESPACE).text == preprint_version.provider.name + assert root.find('.//{%s}title' % crossref.CROSSREF_NAMESPACE).text == preprint_version.title + assert root.find('.//{%s}item_number' % crossref.CROSSREF_NAMESPACE).text == f'osf.io/{preprint_version._id}' + assert root.find('.//{%s}abstract/' % crossref.JATS_NAMESPACE).text == preprint_version.description + assert root.find( + './/{%s}license_ref' % crossref.CROSSREF_ACCESS_INDICATORS).text == 'https://creativecommons.org/licenses/by/4.0/legalcode' + assert root.find('.//{%s}license_ref' % crossref.CROSSREF_ACCESS_INDICATORS).get( + 'start_date') == preprint_version.date_published.strftime('%Y-%m-%d') + + doi_relations = root.find('.//{%s}doi_relations' % crossref.CROSSREF_NAMESPACE) + assert doi_relations is not None + + doi = doi_relations.find('.//{%s}doi' % crossref.CROSSREF_NAMESPACE) + assert doi is not None + assert doi.text == settings.DOI_FORMAT.format(prefix=preprint_version.provider.doi_prefix, guid=preprint_version._id) + + related_item = doi_relations.find('.//{%s}related_item' % crossref.CROSSREF_RELATIONS) + assert related_item is not None + + description = related_item.find('.//{%s}description' % crossref.CROSSREF_RELATIONS) + assert description is not None + assert description.text == 'Updated version' + + intra_work_relation = related_item.find('.//{%s}intra_work_relation' % crossref.CROSSREF_RELATIONS) + assert intra_work_relation is not None + assert intra_work_relation.get('relationship-type') == 'isVersionOf' + assert intra_work_relation.get('identifier-type') == 'doi' + assert intra_work_relation.text == settings.DOI_FORMAT.format(prefix=preprint.provider.doi_prefix, guid=preprint._id) + @responses.activate @mock.patch('osf.models.preprint.update_or_enqueue_on_preprint_updated', mock.Mock()) def test_metadata_for_deleted_node(self, crossref_client, preprint): diff --git a/tests/test_preprints.py b/tests/test_preprints.py index 22d4167d764..522ff146718 100644 --- a/tests/test_preprints.py +++ b/tests/test_preprints.py @@ -1,46 +1,30 @@ -import jwe -import jwt -from unittest import mock -from furl import furl -import pytest -import time -from urllib.parse import urljoin import datetime -from django.utils import timezone -import itsdangerous from importlib import import_module -import pytest_socket +import time +from urllib.parse import urljoin from django.contrib.auth.models import Group -from django.core.exceptions import ValidationError +from django.contrib.contenttypes.models import ContentType from django.conf import settings as django_conf_settings +from django.core.exceptions import ValidationError +from django.utils import timezone +from furl import furl +import itsdangerous +import jwe +import jwt +import pytest +import pytest_socket +from unittest import mock -from website import settings, mails -from website.preprints.tasks import ( - on_preprint_updated, - update_or_create_preprint_identifiers, - update_or_enqueue_on_preprint_updated, - should_update_preprint_identifiers -) -from website.identifiers.clients import CrossRefClient, ECSArXivCrossRefClient, crossref -from website.identifiers.utils import request_identifiers from framework.auth import signing from framework.celery_tasks import handlers from framework.postcommit_tasks.handlers import get_task_from_postcommit_queue, postcommit_celery_queue -from framework.exceptions import PermissionsError, HTTPError +from framework.exceptions import PermissionsError from framework.auth.core import Auth from addons.osfstorage.models import OsfStorageFile from addons.base import views -from osf.models import Tag, Preprint, PreprintLog, PreprintContributor, Subject +from osf.models import Tag, Preprint, PreprintLog, PreprintContributor from osf.exceptions import PreprintStateError, ValidationError, ValidationValueError - -from osf.utils.permissions import READ, WRITE, ADMIN -from osf.utils.workflows import DefaultStates, RequestTypes -from tests.base import assert_datetime_equal, OsfTestCase -from tests.utils import assert_preprint_logs - -SessionStore = import_module(django_conf_settings.SESSION_ENGINE).SessionStore - from osf_tests.factories import ( ProjectFactory, AuthUserFactory, @@ -53,9 +37,26 @@ PreprintRequestFactory, NodeFactory, ) +from osf.utils.permissions import READ, WRITE, ADMIN +from osf.utils.workflows import DefaultStates, RequestTypes, ReviewStates +from tests.base import assert_datetime_equal, OsfTestCase +from tests.utils import assert_preprint_logs +from website import settings, mails +from website.identifiers.clients import CrossRefClient, ECSArXivCrossRefClient, crossref +from website.identifiers.utils import request_identifiers +from website.preprints.tasks import ( + on_preprint_updated, + update_or_create_preprint_identifiers, + update_or_enqueue_on_preprint_updated, + should_update_preprint_identifiers +) + + +SessionStore = import_module(django_conf_settings.SESSION_ENGINE).SessionStore pytestmark = pytest.mark.django_db + @pytest.fixture() def user(): return UserFactory() @@ -281,9 +282,10 @@ class TestPreprintCreation: def test_creator_is_added_as_contributor(self, fake): user = UserFactory() - preprint = Preprint( + preprint = Preprint.create( title=fake.bs(), creator=user, + description=fake.bs(), provider=PreprintProviderFactory() ) preprint.save() @@ -296,9 +298,10 @@ def test_creator_is_added_as_contributor(self, fake): def test_default_region_set_to_user_settings_osfstorage_default(self, fake): user = UserFactory() - preprint = Preprint( + preprint = Preprint.create( title=fake.bs, creator=user, + description=fake.bs(), provider=PreprintProviderFactory() ) preprint.save() @@ -307,9 +310,10 @@ def test_default_region_set_to_user_settings_osfstorage_default(self, fake): def test_root_folder_created_automatically(self, fake): user = UserFactory() - preprint = Preprint( + preprint = Preprint.create( title=fake.bs, creator=user, + description=fake.bs(), provider=PreprintProviderFactory() ) preprint.save() @@ -2413,3 +2417,293 @@ def test_crossref_status_is_updated(self, make_withdrawal_request, preprint, pre assert preprint_pre_mod.is_retracted assert preprint_pre_mod.verified_publishable assert crossref_client.get_status(preprint_pre_mod) == 'unavailable' + + +@pytest.mark.django_db +class TestPreprintVersionWithModeration: + + @pytest.fixture() + def creator(self): + return AuthUserFactory() + + @pytest.fixture() + def moderator(self, preprint_pre_mod, preprint_post_mod): + provider_moderator = AuthUserFactory() + preprint_pre_mod.provider.add_to_group(provider_moderator, 'moderator') + preprint_pre_mod.provider.save() + preprint_post_mod.provider.add_to_group(provider_moderator, 'moderator') + preprint_post_mod.provider.save() + return provider_moderator + + @pytest.fixture() + def unpublished_preprint_pre_mod(self): + return PreprintFactory(reviews_workflow='pre-moderation', is_published=False) + + @pytest.fixture() + def preprint_pre_mod(self): + return PreprintFactory(reviews_workflow='pre-moderation') + + @pytest.fixture() + def unpublished_preprint_post_mod(self): + return PreprintFactory(reviews_workflow='post-moderation', is_published=False) + + @pytest.fixture() + def preprint_post_mod(self): + return PreprintFactory(reviews_workflow='post-moderation') + + @pytest.fixture() + def make_withdrawal_request(self, creator): + def withdrawal_request(target): + request = PreprintRequestFactory( + creator=creator, + target=target, + request_type=RequestTypes.WITHDRAWAL.value, + machine_state=DefaultStates.INITIAL.value) + request.run_submit(creator) + return request + return withdrawal_request + + def test_unpublished_preprint_pre_mod_accept(self, unpublished_preprint_pre_mod, creator, moderator): + assert unpublished_preprint_pre_mod.is_published is False + assert unpublished_preprint_pre_mod.machine_state == ReviewStates.INITIAL.value + + unpublished_preprint_pre_mod.run_submit(creator) + assert unpublished_preprint_pre_mod.is_published is False + assert unpublished_preprint_pre_mod.machine_state == ReviewStates.PENDING.value + guid_obj = unpublished_preprint_pre_mod.get_guid() + assert guid_obj.object_id == unpublished_preprint_pre_mod.pk + assert guid_obj.referent == unpublished_preprint_pre_mod + assert guid_obj.content_type == ContentType.objects.get_for_model(Preprint) + + unpublished_preprint_pre_mod.run_accept(moderator, 'comment') + assert unpublished_preprint_pre_mod.is_published is True + assert unpublished_preprint_pre_mod.machine_state == ReviewStates.ACCEPTED.value + guid_obj = unpublished_preprint_pre_mod.get_guid() + assert guid_obj.object_id == unpublished_preprint_pre_mod.pk + assert guid_obj.referent == unpublished_preprint_pre_mod + assert guid_obj.content_type == ContentType.objects.get_for_model(Preprint) + + def test_unpublished_preprint_pre_mod_reject(self, unpublished_preprint_pre_mod, creator, moderator): + assert unpublished_preprint_pre_mod.is_published is False + assert unpublished_preprint_pre_mod.machine_state == ReviewStates.INITIAL.value + + unpublished_preprint_pre_mod.run_submit(creator) + assert unpublished_preprint_pre_mod.is_published is False + assert unpublished_preprint_pre_mod.machine_state == ReviewStates.PENDING.value + guid_obj = unpublished_preprint_pre_mod.get_guid() + assert guid_obj.object_id == unpublished_preprint_pre_mod.pk + assert guid_obj.referent == unpublished_preprint_pre_mod + assert guid_obj.content_type == ContentType.objects.get_for_model(Preprint) + + unpublished_preprint_pre_mod.run_reject(moderator, 'comment') + assert unpublished_preprint_pre_mod.is_published is False + assert unpublished_preprint_pre_mod.machine_state == ReviewStates.REJECTED.value + assert unpublished_preprint_pre_mod.versioned_guids.first().is_rejected is True + assert guid_obj.object_id == unpublished_preprint_pre_mod.pk + assert guid_obj.referent == unpublished_preprint_pre_mod + assert guid_obj.content_type == ContentType.objects.get_for_model(Preprint) + + def test_unpublished_version_pre_mod_submit_and_accept(self, preprint_pre_mod, creator, moderator): + assert preprint_pre_mod.is_published is True + assert preprint_pre_mod.machine_state == ReviewStates.ACCEPTED.value + + new_version = PreprintFactory.create_version( + create_from=preprint_pre_mod, + creator=creator, + final_machine_state='initial', + is_published=False, + set_doi=False + ) + assert new_version.is_published is False + assert new_version.machine_state == ReviewStates.INITIAL.value + guid_obj = new_version.get_guid() + assert guid_obj.object_id == preprint_pre_mod.pk + assert guid_obj.referent == preprint_pre_mod + assert guid_obj.content_type == ContentType.objects.get_for_model(Preprint) + + new_version.run_submit(creator) + assert new_version.is_published is False + assert new_version.machine_state == ReviewStates.PENDING.value + guid_obj = new_version.get_guid() + assert guid_obj.object_id == preprint_pre_mod.pk + assert guid_obj.referent == preprint_pre_mod + assert guid_obj.content_type == ContentType.objects.get_for_model(Preprint) + + new_version.run_accept(moderator, 'comment') + assert new_version.is_published is True + assert new_version.machine_state == ReviewStates.ACCEPTED.value + guid_obj = new_version.get_guid() + assert guid_obj.object_id == new_version.pk + assert guid_obj.referent == new_version + assert guid_obj.content_type == ContentType.objects.get_for_model(Preprint) + + def test_new_version_pre_mod_submit_and_reject(self, preprint_pre_mod, creator, moderator): + assert preprint_pre_mod.is_published is True + assert preprint_pre_mod.machine_state == ReviewStates.ACCEPTED.value + + new_version = PreprintFactory.create_version( + create_from=preprint_pre_mod, + creator=creator, + final_machine_state='initial', + is_published=False, + set_doi=False + ) + assert new_version.is_published is False + assert new_version.machine_state == ReviewStates.INITIAL.value + guid_obj = new_version.get_guid() + assert guid_obj.object_id == preprint_pre_mod.pk + assert guid_obj.referent == preprint_pre_mod + assert guid_obj.content_type == ContentType.objects.get_for_model(Preprint) + + new_version.run_submit(creator) + assert new_version.is_published is False + assert new_version.machine_state == ReviewStates.PENDING.value + guid_obj = new_version.get_guid() + assert guid_obj.object_id == preprint_pre_mod.pk + assert guid_obj.referent == preprint_pre_mod + assert guid_obj.content_type == ContentType.objects.get_for_model(Preprint) + + new_version.run_reject(moderator, 'comment') + assert new_version.is_published is False + assert new_version.machine_state == ReviewStates.REJECTED.value + assert new_version.versioned_guids.first().is_rejected is True + guid_obj = new_version.get_guid() + assert guid_obj.object_id == preprint_pre_mod.pk + assert guid_obj.referent == preprint_pre_mod + + def test_unpublished_preprint_post_mod_submit_and_accept(self, unpublished_preprint_post_mod, creator, moderator): + assert unpublished_preprint_post_mod.is_published is False + assert unpublished_preprint_post_mod.machine_state == ReviewStates.INITIAL.value + + unpublished_preprint_post_mod.run_submit(creator) + assert unpublished_preprint_post_mod.is_published is True + assert unpublished_preprint_post_mod.machine_state == ReviewStates.PENDING.value + guid_obj = unpublished_preprint_post_mod.get_guid() + assert guid_obj.object_id == unpublished_preprint_post_mod.pk + assert guid_obj.referent == unpublished_preprint_post_mod + assert guid_obj.content_type == ContentType.objects.get_for_model(Preprint) + + unpublished_preprint_post_mod.run_accept(moderator, 'comment') + assert unpublished_preprint_post_mod.is_published is True + assert unpublished_preprint_post_mod.machine_state == ReviewStates.ACCEPTED.value + guid_obj = unpublished_preprint_post_mod.get_guid() + assert guid_obj.object_id == unpublished_preprint_post_mod.pk + assert guid_obj.referent == unpublished_preprint_post_mod + assert guid_obj.content_type == ContentType.objects.get_for_model(Preprint) + + def test_unpublished_new_version_post_mod_submit_and_accept(self, preprint_post_mod, creator, moderator): + assert preprint_post_mod.is_published is True + assert preprint_post_mod.machine_state == ReviewStates.ACCEPTED.value + new_version = PreprintFactory.create_version( + create_from=preprint_post_mod, + creator=creator, + final_machine_state='initial', + is_published=False, + set_doi=False + ) + assert new_version.is_published is False + assert new_version.machine_state == ReviewStates.INITIAL.value + guid_obj = new_version.get_guid() + assert guid_obj.object_id == preprint_post_mod.pk + assert guid_obj.referent == preprint_post_mod + assert guid_obj.content_type == ContentType.objects.get_for_model(Preprint) + + new_version.run_submit(creator) + assert new_version.is_published is True + assert new_version.machine_state == ReviewStates.PENDING.value + guid_obj = new_version.get_guid() + assert guid_obj.object_id == new_version.pk + assert guid_obj.referent == new_version + assert guid_obj.content_type == ContentType.objects.get_for_model(Preprint) + + new_version.run_accept(moderator, 'comment') + assert new_version.is_published is True + assert new_version.machine_state == ReviewStates.ACCEPTED.value + guid_obj = new_version.get_guid() + assert guid_obj.object_id == new_version.pk + assert guid_obj.referent == new_version + assert guid_obj.content_type == ContentType.objects.get_for_model(Preprint) + + def test_preprint_withdrawal_request_pre_mod(self, make_withdrawal_request, moderator, preprint_pre_mod): + assert preprint_pre_mod.is_published is True + assert preprint_pre_mod.machine_state == ReviewStates.ACCEPTED.value + withdrawal_request = make_withdrawal_request(preprint_pre_mod) + preprint_pre_mod.reload() + assert withdrawal_request.machine_state == DefaultStates.PENDING.value + assert preprint_pre_mod.is_published is True + assert preprint_pre_mod.machine_state == ReviewStates.ACCEPTED.value + withdrawal_request.run_accept(moderator, 'comment') + preprint_pre_mod.reload() + assert withdrawal_request.machine_state == DefaultStates.ACCEPTED.value + # In model, `is_published` remains True; this is different than API where `is_published` uses `NoneIfWithdrawal` + assert preprint_pre_mod.is_retracted is True + assert preprint_pre_mod.is_published is True + assert preprint_pre_mod.machine_state == ReviewStates.WITHDRAWN.value + + def test_preprint_withdrawal_request_post_mod(self, make_withdrawal_request, moderator, preprint_post_mod): + assert preprint_post_mod.is_published is True + assert preprint_post_mod.machine_state == ReviewStates.ACCEPTED.value + withdrawal_request = make_withdrawal_request(preprint_post_mod) + preprint_post_mod.reload() + assert withdrawal_request.machine_state == DefaultStates.PENDING.value + assert preprint_post_mod.is_published is True + assert preprint_post_mod.machine_state == ReviewStates.ACCEPTED.value + withdrawal_request.run_accept(moderator, 'comment') + preprint_post_mod.reload() + assert withdrawal_request.machine_state == DefaultStates.ACCEPTED.value + # In model, `is_published` remains True; this is different than API where `is_published` uses `NoneIfWithdrawal` + assert preprint_post_mod.is_retracted is True + assert preprint_post_mod.is_published is True + assert preprint_post_mod.machine_state == ReviewStates.WITHDRAWN.value + + def test_preprint_version_withdrawal_request_pre_mod(self, make_withdrawal_request, creator, moderator, preprint_pre_mod): + new_version = PreprintFactory.create_version( + create_from=preprint_pre_mod, + creator=creator, + final_machine_state='initial', + is_published=False, + set_doi=False + ) + new_version.run_submit(creator) + new_version.run_accept(moderator, 'comment') + new_version.reload() + assert new_version.is_published is True + assert new_version.machine_state == ReviewStates.ACCEPTED.value + withdrawal_request = make_withdrawal_request(new_version) + new_version.reload() + assert withdrawal_request.machine_state == DefaultStates.PENDING.value + assert new_version.is_published is True + assert new_version.machine_state == ReviewStates.ACCEPTED.value + withdrawal_request.run_accept(moderator, 'comment') + new_version.reload() + assert withdrawal_request.machine_state == DefaultStates.ACCEPTED.value + # In model, `is_published` remains True; this is different than API where `is_published` uses `NoneIfWithdrawal` + assert new_version.is_retracted is True + assert new_version.is_published is True + assert new_version.machine_state == ReviewStates.WITHDRAWN.value + + def test_preprint_version_withdrawal_request_post_mod(self, make_withdrawal_request, creator, moderator, preprint_post_mod): + new_version = PreprintFactory.create_version( + create_from=preprint_post_mod, + creator=creator, + final_machine_state='initial', + is_published=False, + set_doi=False + ) + new_version.run_submit(creator) + new_version.run_accept(moderator, 'comment') + new_version.reload() + assert new_version.is_published is True + assert new_version.machine_state == ReviewStates.ACCEPTED.value + withdrawal_request = make_withdrawal_request(new_version) + new_version.reload() + assert withdrawal_request.machine_state == DefaultStates.PENDING.value + assert new_version.is_published is True + assert new_version.machine_state == ReviewStates.ACCEPTED.value + withdrawal_request.run_accept(moderator, 'comment') + new_version.reload() + assert withdrawal_request.machine_state == DefaultStates.ACCEPTED.value + # In model, `is_published` remains True; this is different than API where `is_published` uses `NoneIfWithdrawal` + assert new_version.is_retracted is True + assert new_version.is_published is True + assert new_version.machine_state == ReviewStates.WITHDRAWN.value diff --git a/website/identifiers/clients/crossref.py b/website/identifiers/clients/crossref.py index 09e74f7ad8f..c80f5c06790 100644 --- a/website/identifiers/clients/crossref.py +++ b/website/identifiers/clients/crossref.py @@ -138,6 +138,23 @@ def build_posted_content(self, preprint, element, include_relation): ] posted_content.append(element.doi_data(*doi_data)) + preprint_versions = preprint.get_preprint_versions() + for preprint_version, previous_version in zip(preprint_versions, preprint_versions[1:]): + if preprint_version.version > preprint.version: + continue + doi_relations = element.doi_relations( + element.doi(self.build_doi(preprint_version)), + element.program( + element.related_item( + element.description('Updated version'), + element.intra_work_relation( + self.build_doi(previous_version), + **{'relationship-type': 'isVersionOf', 'identifier-type': 'doi'} + ) + ), xmlns=CROSSREF_RELATIONS + ) + ) + posted_content.append(doi_relations) return posted_content def _process_crossref_name(self, contributor): diff --git a/website/project/views/node.py b/website/project/views/node.py index c0e30b9630b..66460783d5a 100644 --- a/website/project/views/node.py +++ b/website/project/views/node.py @@ -789,7 +789,7 @@ def _view_project(node, auth, primary=False, 'doi': node.get_identifier_value('doi'), 'ark': node.get_identifier_value('ark'), }, - 'visible_preprints': serialize_preprints(node, user), + 'visible_preprints': serialize_preprints(node, user, latest_only=True), 'institutions': get_affiliated_institutions(node) if node else [], 'has_draft_registrations': node.has_active_draft_registrations, 'access_requests_enabled': node.access_requests_enabled, @@ -921,7 +921,10 @@ def serialize_collections(collection_submissions, auth): } for collection_submission in collection_submissions if collection_submission.collection.provider and (collection_submission.collection.is_public or (auth.user and auth.user.has_perm('read_collection', collection_submission.collection)))] -def serialize_preprints(node, user): +def serialize_preprints(node, user, latest_only=False): + preprints = Preprint.objects.can_view(base_queryset=node.preprints, user=user).filter(date_withdrawn__isnull=True) + if latest_only: + preprints = [preprint for preprint in preprints if preprint.is_latest_version] return [ { 'title': preprint.title, @@ -932,7 +935,7 @@ def serialize_preprints(node, user): 'provider': {'name': 'OSF Preprints' if preprint.provider.name == 'Open Science Framework' else preprint.provider.name, 'workflow': preprint.provider.reviews_workflow}, 'url': preprint.url, 'absolute_url': preprint.absolute_url - } for preprint in Preprint.objects.can_view(base_queryset=node.preprints, user=user).filter(date_withdrawn__isnull=True) + } for preprint in preprints ] diff --git a/website/templates/emails/digest.html.mako b/website/templates/emails/digest.html.mako index 84ac3ca98eb..719a86b0c3c 100644 --- a/website/templates/emails/digest.html.mako +++ b/website/templates/emails/digest.html.mako @@ -9,7 +9,7 @@

<% from osf.models import Guid %> - ${Guid.objects.get(_id=key).referent.title} + ${Guid.load(key).referent.title} %if parent : in ${Guid.objects.get(_id=parent).referent.title} %endif diff --git a/website/views.py b/website/views.py index 2d654a81946..0f0d1c9ad75 100644 --- a/website/views.py +++ b/website/views.py @@ -206,13 +206,10 @@ def forgot_password_form(): def resolve_guid_download(guid, provider=None): - try: - guid = Guid.objects.get(_id=guid.lower()) - except Guid.DoesNotExist: + resource, _ = Guid.load_referent(guid, ignore_not_found=True) + if not resource: raise HTTPError(http_status.HTTP_404_NOT_FOUND) - resource = guid.referent - suffix = request.view_args.get('suffix') if suffix and suffix.startswith('osfstorage/files/'): # legacy route filename = suffix.replace('osfstorage/files/', '').rstrip('/') @@ -293,15 +290,14 @@ def resolve_guid(guid, suffix=None): if 'revision' in request.args: return resolve_guid_download(guid) - # Retrieve guid data if present, error if missing - try: - resource = Guid.objects.get(_id=guid.lower()).referent - except Guid.DoesNotExist: - raise HTTPError(http_status.HTTP_404_NOT_FOUND) - + # Retrieve resource and version from a guid str + resource, version = Guid.load_referent(guid, ignore_not_found=True) if not resource or not resource.deep_url: raise HTTPError(http_status.HTTP_404_NOT_FOUND) + if version and guid != resource._id: + return redirect(f'/{resource._id}/{suffix}' if suffix else f'/{resource._id}/', code=302) + if isinstance(resource, DraftNode): raise HTTPError(http_status.HTTP_404_NOT_FOUND)