diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild index 8a47cddf79369..84d7341ea3b39 100644 --- a/drivers/gpu/arm/midgard/Kbuild +++ b/drivers/gpu/arm/midgard/Kbuild @@ -15,7 +15,7 @@ # Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= "r13p0-00rel0" +MALI_RELEASE_NAME ?= "r9p0-05rel0" # Paths required for build KBASE_PATH = $(src) @@ -88,6 +88,7 @@ SRC := \ mali_kbase_context.c \ mali_kbase_pm.c \ mali_kbase_config.c \ + mali_kbase_instr.c \ mali_kbase_vinstr.c \ mali_kbase_softjobs.c \ mali_kbase_10969_workaround.c \ @@ -103,27 +104,25 @@ SRC := \ mali_kbase_replay.c \ mali_kbase_mem_profile_debugfs.c \ mali_kbase_mmu_mode_lpae.c \ - mali_kbase_mmu_mode_aarch64.c \ mali_kbase_disjoint_events.c \ mali_kbase_gator_api.c \ mali_kbase_debug_mem_view.c \ mali_kbase_debug_job_fault.c \ mali_kbase_smc.c \ mali_kbase_mem_pool.c \ - mali_kbase_mem_pool_debugfs.c \ - mali_kbase_tlstream.c \ - mali_kbase_strings.c \ - mali_kbase_as_fault_debugfs.c + mali_kbase_mem_pool_debugfs.c -ifeq ($(MALI_UNIT_TEST),1) - SRC += mali_kbase_tlstream_test.c +ifeq ($(CONFIG_MALI_MIPE_ENABLED),y) + SRC += mali_kbase_tlstream.c + ifeq ($(MALI_UNIT_TEST),1) + SRC += mali_kbase_tlstream_test.c + endif endif ifeq ($(MALI_CUSTOMER_RELEASE),0) SRC += mali_kbase_regs_dump_debugfs.c endif - # Job Scheduler Policy: Completely Fair Scheduler SRC += mali_kbase_js_policy_cfs.c @@ -201,7 +200,13 @@ obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o # Tell the Linux build system to enable building of our .c files mali_kbase-y := $(SRC:.c=.o) -mali_kbase-$(CONFIG_MALI_DMA_FENCE) += mali_kbase_dma_fence.o +ifneq ($(wildcard $(src)/internal/Kbuild),) +ifeq ($(MALI_CUSTOMER_RELEASE),0) +# This include may set MALI_BACKEND_PATH and CONFIG_MALI_BACKEND_REAL +include $(src)/internal/Kbuild +mali_kbase-y += $(INTERNAL:.c=.o) +endif +endif MALI_BACKEND_PATH ?= backend CONFIG_MALI_BACKEND ?= gpu diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig index 201832b262d46..4f70e31d7b25f 100644 --- a/drivers/gpu/arm/midgard/Kconfig +++ b/drivers/gpu/arm/midgard/Kconfig @@ -16,7 +16,6 @@ menuconfig MALI_MIDGARD tristate "Mali Midgard series support" - select GPU_TRACEPOINTS if ANDROID default n help Enable this option to build support for a ARM Mali Midgard GPU. @@ -24,15 +23,38 @@ menuconfig MALI_MIDGARD To compile this driver as a module, choose M here: this will generate a single module, called mali_kbase. +choice + prompt "Streamline support" + depends on MALI_MIDGARD + default MALI_TIMELINE_DISABLED + help + Select streamline support configuration. + +config MALI_TIMELINE_DISABLED + bool "Streamline support disabled" + help + Disable support for ARM Streamline Performance Analyzer. + + Timeline support will not be included in + kernel code. + Debug stream will not be generated. + config MALI_GATOR_SUPPORT bool "Streamline support via Gator" - depends on MALI_MIDGARD - default n help Adds diagnostic support for use with the ARM Streamline Performance Analyzer. You will need the Gator device driver already loaded before loading this driver when enabling Streamline debug support. - This is a legacy interface required by older versions of Streamline. + +config MALI_MIPE_ENABLED + bool "Streamline support via MIPE" + help + Adds diagnostic support for use with the ARM Streamline Performance Analyzer. + + Stream will be transmitted directly to Mali GPU library. + Compatible version of the library is required to read debug stream generated by kernel. + +endchoice config MALI_MIDGARD_DVFS bool "Enable legacy DVFS" @@ -59,15 +81,6 @@ config MALI_DEVFREQ governor, the frequency of Mali will be dynamically selected from the available OPPs. -config MALI_DMA_FENCE - bool "DMA_BUF fence support for Mali" - depends on MALI_MIDGARD && !KDS - default n - help - Support DMA_BUF fences for Mali. - - This option should only be enabled if KDS is not present and - the Linux Kernel has built in support for DMA_BUF fences. # MALI_EXPERT configuration options @@ -79,18 +92,13 @@ menuconfig MALI_EXPERT Enabling this option and modifying the default settings may produce a driver with performance or other limitations. -config MALI_PRFCNT_SET_SECONDARY - bool "Use secondary set of performance counters" +config MALI_DEBUG_SHADER_SPLIT_FS + bool "Allow mapping of shader cores via sysfs" depends on MALI_MIDGARD && MALI_EXPERT default n help - Select this option to use secondary set of performance counters. Kernel - features that depend on an access to the primary set of counters may - become unavailable. Enabling this option will prevent power management - from working optimally and may cause instrumentation tools to return - bogus results. - - If unsure, say N. + Select this option to provide a sysfs entry for runtime configuration of shader + core affinity masks. config MALI_PLATFORM_FAKE bool "Enable fake platform device support" @@ -154,23 +162,6 @@ config MALI_DEBUG help Select this option for increased checking and reporting of errors. -config MALI_FENCE_DEBUG - bool "Debug sync fence usage" - depends on MALI_MIDGARD && MALI_EXPERT && SYNC - default y if MALI_DEBUG - help - Select this option to enable additional checking and reporting on the - use of sync fences in the Mali driver. - - This will add a 3s timeout to all sync fence waits in the Mali - driver, so that when work for Mali has been waiting on a sync fence - for a long time a debug message will be printed, detailing what fence - is causing the block, and which dependent Mali atoms are blocked as a - result of this. - - The timeout can be changed at runtime through the js_soft_timeout - device attribute, where the timeout is specified in milliseconds. - config MALI_NO_MALI bool "No Mali" depends on MALI_MIDGARD && MALI_EXPERT @@ -207,19 +198,11 @@ config MALI_SYSTEM_TRACE minimal overhead when not in use. Enable only if you know what you are doing. -config MALI_GPU_MMU_AARCH64 - bool "Use AArch64 page tables" - depends on MALI_MIDGARD && MALI_EXPERT - default n +config MALI_GPU_TRACEPOINTS + bool "Enable GPU tracepoints" + depends on MALI_MIDGARD && ANDROID + select GPU_TRACEPOINTS help - Use AArch64 format page tables for the GPU instead of LPAE-style. - The two formats have the same functionality and performance but a - future GPU may deprecate or remove the legacy LPAE-style format. - - The LPAE-style format is supported on all Midgard and current Bifrost - GPUs. Enabling AArch64 format restricts the driver to only supporting - Bifrost GPUs. - - If in doubt, say N. + Enables GPU tracepoints using Android trace event definitions. source "drivers/gpu/arm/midgard/platform/Kconfig" diff --git a/drivers/gpu/arm/midgard/Makefile b/drivers/gpu/arm/midgard/Makefile index e1625e6dba79c..d4d5de4cd5121 100644 --- a/drivers/gpu/arm/midgard/Makefile +++ b/drivers/gpu/arm/midgard/Makefile @@ -1,5 +1,5 @@ # -# (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -26,6 +26,10 @@ ifeq ($(MALI_UNIT_TEST), 1) EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers endif +ifneq ($(wildcard $(CURDIR)/internal/Makefile.in),) +include $(CURDIR)/internal/Makefile.in +endif + ifeq ($(MALI_BUS_LOG), 1) #Add bus logger symbols EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c index c6862539c8ddd..92a14fa1bae12 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -16,12 +16,7 @@ #include "backend/gpu/mali_kbase_cache_policy_backend.h" +#include #include -void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, - u32 mode) -{ - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) - kbase_reg_write(kbdev, COHERENCY_ENABLE, mode, NULL); -} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h index fe9869109a825..42069fc88a1ff 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,13 +22,5 @@ #include "mali_kbase.h" #include "mali_base_kernel.h" -/** - * kbase_cache_set_coherency_mode() - Sets the system coherency mode - * in the GPU. - * @kbdev: Device pointer - * @mode: Coherency mode. COHERENCY_ACE/ACE_LITE - */ -void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, - u32 mode); #endif /* _KBASE_CACHE_POLICY_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c index ad05fe5bea8d2..86227d9962570 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -135,14 +135,6 @@ kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) stat->private_data = NULL; -#ifdef CONFIG_DEVFREQ_THERMAL -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) - if (kbdev->devfreq_cooling) - memcpy(&kbdev->devfreq_cooling->last_status, stat, - sizeof(*stat)); -#endif -#endif - return 0; } @@ -213,8 +205,7 @@ int kbase_devfreq_init(struct kbase_device *kbdev) dp = &kbdev->devfreq_profile; dp->initial_freq = kbdev->current_freq; - /* .KP : set devfreq_dvfs_interval_in_ms */ - dp->polling_ms = 20; + dp->polling_ms = 100; dp->target = kbase_devfreq_target; dp->get_dev_status = kbase_devfreq_status; dp->get_cur_freq = kbase_devfreq_cur_freq; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c index b9238a305177c..83d5ec9f7a93e 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,9 +33,7 @@ void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); KBASE_DEBUG_ASSERT(kbdev->dev != NULL); dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value); - writel(value, kbdev->reg + offset); - if (kctx && kctx->jctx.tb) kbase_device_trace_register_access(kctx, REG_WRITE, offset, value); @@ -50,9 +48,7 @@ u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); KBASE_DEBUG_ASSERT(kbdev->dev != NULL); - val = readl(kbdev->reg + offset); - dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val); if (kctx && kctx->jctx.tb) kbase_device_trace_register_access(kctx, REG_READ, offset, val); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c index d578fd78e8259..72a98d0f79525 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,6 +20,7 @@ * Register-based HW access backend APIs */ #include +#include #include #include #include @@ -80,6 +81,7 @@ int kbase_backend_late_init(struct kbase_device *kbdev) if (err) goto fail_timer; +/* Currently disabled on the prototype */ #ifdef CONFIG_MALI_DEBUG #ifndef CONFIG_MALI_NO_MALI if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { @@ -99,13 +101,12 @@ int kbase_backend_late_init(struct kbase_device *kbdev) return 0; fail_job_slot: - +/* Currently disabled on the prototype */ #ifdef CONFIG_MALI_DEBUG #ifndef CONFIG_MALI_NO_MALI fail_interrupt_test: #endif /* !CONFIG_MALI_NO_MALI */ #endif /* CONFIG_MALI_DEBUG */ - kbase_backend_timer_term(kbdev); fail_timer: kbase_hwaccess_pm_halt(kbdev); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c index d410cd297889c..705b1ebfa87fb 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -86,20 +86,8 @@ void kbase_backend_gpuprops_get(struct kbase_device *kbdev, void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev, struct kbase_gpuprops_regdump *regdump) { - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_COHERENCY_REG)) { - /* Ensure we can access the GPU registers */ - kbase_pm_register_access_enable(kbdev); - - regdump->coherency_features = kbase_reg_read(kbdev, - GPU_CONTROL_REG(COHERENCY_FEATURES), NULL); - - /* We're done accessing the GPU registers for now. */ - kbase_pm_register_access_disable(kbdev); - } else { - /* Pre COHERENCY_FEATURES we only supported ACE_LITE */ regdump->coherency_features = COHERENCY_FEATURE_BIT(COHERENCY_NONE) | COHERENCY_FEATURE_BIT(COHERENCY_ACE_LITE); - } } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c index 3f06a10f7fed4..2c987071a77ca 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,7 +23,6 @@ #include #include -#include #include #include #include @@ -41,6 +40,14 @@ static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev) u32 irq_mask; spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + /* Wait for any reset to complete */ + while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.backend.cache_clean_wait, + kbdev->hwcnt.backend.state != + KBASE_INSTR_STATE_RESETTING); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_REQUEST_CLEAN); @@ -67,14 +74,18 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, { unsigned long flags, pm_flags; int err = -EINVAL; + struct kbasep_js_device_data *js_devdata; u32 irq_mask; int ret; u64 shader_cores_needed; - u32 prfcnt_config; + + KBASE_DEBUG_ASSERT(NULL == kbdev->hwcnt.suspended_kctx); shader_cores_needed = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER); + js_devdata = &kbdev->js_data; + /* alignment failure */ if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1))) goto out_err; @@ -89,6 +100,14 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { + /* GPU is being reset */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { /* Instrumentation is already enabled */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); @@ -106,6 +125,10 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, kbdev->hwcnt.kctx = kctx; /* Remember the dump address so we can reprogram it later */ kbdev->hwcnt.addr = setup->dump_buffer; + /* Remember all the settings for suspend/resume */ + if (&kbdev->hwcnt.suspended_state != setup) + memcpy(&kbdev->hwcnt.suspended_state, setup, + sizeof(kbdev->hwcnt.suspended_state)); /* Request the clean */ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; @@ -128,22 +151,9 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, kbase_pm_request_l2_caches(kbdev); /* Configure */ - prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT; -#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY - { - u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) - >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; - int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id); - - if (arch_v6) - prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT; - } -#endif - kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - prfcnt_config | PRFCNT_CONFIG_MODE_OFF, kctx); - + (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) + | PRFCNT_CONFIG_MODE_OFF, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), setup->dump_buffer & 0xFFFFFFFF, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), @@ -164,7 +174,8 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, setup->tiler_bm, kctx); kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), - prfcnt_config | PRFCNT_CONFIG_MODE_MANUAL, kctx); + (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | + PRFCNT_CONFIG_MODE_MANUAL, kctx); /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump */ @@ -174,6 +185,14 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { + /* GPU is being reset */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; kbdev->hwcnt.backend.triggered = 1; wake_up(&kbdev->hwcnt.backend.wait); @@ -340,11 +359,15 @@ void kbasep_cache_clean_worker(struct work_struct *data) spin_lock_irqsave(&kbdev->hwcnt.lock, flags); /* Wait for our condition, and any reset to complete */ - while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { + while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING || + kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_CLEANING) { spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); wait_event(kbdev->hwcnt.backend.cache_clean_wait, + (kbdev->hwcnt.backend.state != + KBASE_INSTR_STATE_RESETTING && kbdev->hwcnt.backend.state != - KBASE_INSTR_STATE_CLEANING); + KBASE_INSTR_STATE_CLEANING)); spin_lock_irqsave(&kbdev->hwcnt.lock, flags); } KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == @@ -377,6 +400,9 @@ void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) &kbdev->hwcnt.backend.cache_clean_work); KBASE_DEBUG_ASSERT(ret); } + /* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a reset, + * and the instrumentation state hasn't been restored yet - + * kbasep_reset_timeout_worker() will do the rest of the work */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); } @@ -404,6 +430,10 @@ void kbase_clean_caches_done(struct kbase_device *kbdev) kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED; wake_up(&kbdev->hwcnt.backend.cache_clean_wait); } + /* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a + * reset, and the instrumentation state hasn't been restored yet + * - kbasep_reset_timeout_worker() will do the rest of the work + */ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); } @@ -421,6 +451,14 @@ int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { + /* GPU is being reset */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { err = -EINVAL; kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; @@ -444,6 +482,14 @@ int kbase_instr_hwcnt_clear(struct kbase_context *kctx) spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { + /* GPU is being reset */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + /* Check it's the context previously set up and we're not already * dumping */ if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h index 4794672da8f09..23bd80a5a1507 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,6 +39,10 @@ enum kbase_instr_state { /* Cache clean completed, and either a) a dump is complete, or * b) instrumentation can now be setup. */ KBASE_INSTR_STATE_CLEANED, + /* kbasep_reset_timeout_worker() has started (but not compelted) a + * reset. This generally indicates the current action should be aborted, + * and kbasep_reset_timeout_worker() will handle the cleanup */ + KBASE_INSTR_STATE_RESETTING, /* An error has occured during DUMPING (page fault). */ KBASE_INSTR_STATE_FAULT }; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c index b891b12a32993..49c72f90aac6f 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,6 +28,7 @@ #define MMU_IRQ_TAG 1 #define GPU_IRQ_TAG 2 + static void *kbase_tag(void *ptr, u32 tag) { return (void *)(((uintptr_t) ptr) | tag); @@ -38,6 +39,9 @@ static void *kbase_untag(void *ptr) return (void *)(((uintptr_t) ptr) & ~3); } + + + static irqreturn_t kbase_job_irq_handler(int irq, void *data) { unsigned long flags; @@ -147,13 +151,13 @@ static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) return IRQ_HANDLED; } - static irq_handler_t kbase_handler_table[] = { [JOB_IRQ_TAG] = kbase_job_irq_handler, [MMU_IRQ_TAG] = kbase_mmu_irq_handler, [GPU_IRQ_TAG] = kbase_gpu_irq_handler, }; + #ifdef CONFIG_MALI_DEBUG #define JOB_IRQ_HANDLER JOB_IRQ_TAG #define MMU_IRQ_HANDLER MMU_IRQ_TAG diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h index 83d477898c5e6..8ccc440171a2b 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,7 +71,6 @@ struct slot_rb { * @reset_work: Work item for performing the reset * @reset_wait: Wait event signalled when the reset is complete * @reset_timer: Timeout for soft-stops before the reset - * @timeouts_updated: Have timeout values just been updated? * * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when * accessing this structure @@ -98,15 +97,11 @@ struct kbase_backend_data { /* The GPU reset process is currently occuring (timeout has expired or * kbasep_try_reset_gpu_early was called) */ #define KBASE_RESET_GPU_HAPPENING 3 -/* Reset the GPU silently, used when resetting the GPU as part of normal - * behavior (e.g. when exiting protected mode). */ -#define KBASE_RESET_GPU_SILENT 4 + struct workqueue_struct *reset_workq; struct work_struct reset_work; wait_queue_head_t reset_wait; struct hrtimer reset_timer; - - bool timeouts_updated; }; /** diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c index 00900a99a898f..33d6aef0ec725 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,6 +15,8 @@ + + /* * Base kernel job manager APIs */ @@ -25,9 +27,11 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif +#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#include +#endif #include +#include #include #include #include @@ -83,31 +87,14 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, * start */ cfg = kctx->as_nr; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) - cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; - #ifndef CONFIG_MALI_COH_GPU - if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_START)) - cfg |= JS_CONFIG_START_FLUSH_NO_ACTION; - else - cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; - - if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END)) - cfg |= JS_CONFIG_END_FLUSH_NO_ACTION; - else - cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; -#endif /* CONFIG_MALI_COH_GPU */ - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10649) || - !kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3982)) - cfg |= JS_CONFIG_START_MMU; + cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; + cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; +#endif + cfg |= JS_CONFIG_START_MMU; cfg |= JS_CONFIG_THREAD_PRI(8); - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE) && - (katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED)) - cfg |= JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) { @@ -124,9 +111,6 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx); - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) - kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_FLUSH_ID_NEXT), - katom->flush_id, kctx); /* Write an approximate start timestamp. * It's approximate because there might be a job in the HEAD register. @@ -146,6 +130,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js), kctx, kbase_jd_atom_id(kctx, katom)); #endif +#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_attrib_atom_config(katom, jc_head, katom->affinity, cfg); kbase_tlstream_tl_ret_ctx_lpu( @@ -157,6 +142,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, katom, &kbdev->gpu_props.props.raw_props.js_features[js], "ctx_nr,atom_nr"); +#endif #ifdef CONFIG_GPU_TRACEPOINTS if (!kbase_backend_nr_atoms_submitted(kbdev, js)) { /* If this is the only job on the slot, trace it as starting */ @@ -218,24 +204,6 @@ static void kbasep_job_slot_update_head_start_timestamp( } } -/** - * kbasep_trace_tl_nret_atom_lpu - Call nret_atom_lpu timeline tracepoint - * @kbdev: kbase device - * @i: job slot - * - * Get kbase atom by calling kbase_gpu_inspect for given job slot. - * Then use obtained katom and name of slot associated with the given - * job slot number in tracepoint call to the instrumentation module - * informing that given atom is no longer executed on given lpu (job slot). - */ -static void kbasep_trace_tl_nret_atom_lpu(struct kbase_device *kbdev, int i) -{ - struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, i, 0); - - kbase_tlstream_tl_nret_atom_lpu(katom, - &kbdev->gpu_props.props.raw_props.js_features[i]); -} - void kbase_job_done(struct kbase_device *kbdev, u32 done) { unsigned long flags; @@ -296,12 +264,9 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) GATOR_JOB_SLOT_SOFT_STOPPED, i), NULL, 0); #endif - +#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_job_softstop(i); - - kbasep_trace_tl_nret_atom_lpu( - kbdev, i); - +#endif /* Soft-stopped job - read the value of * JS_TAIL so that the job chain can * be resumed */ @@ -472,21 +437,19 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) KBASE_EXPORT_TEST_API(kbase_job_done); static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev, - struct kbase_jd_atom *katom) + u16 core_reqs) { bool soft_stops_allowed = true; - if (kbase_jd_katom_is_protected(katom)) { - soft_stops_allowed = false; - } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { - if ((katom->core_req & BASE_JD_REQ_T) != 0) + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { + if ((core_reqs & BASE_JD_REQ_T) != 0) soft_stops_allowed = false; } return soft_stops_allowed; } static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev, - base_jd_core_req core_reqs) + u16 core_reqs) { bool hard_stops_allowed = true; @@ -500,7 +463,7 @@ static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev, void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, int js, u32 action, - base_jd_core_req core_reqs, + u16 core_reqs, struct kbase_jd_atom *target_katom) { struct kbase_context *kctx = target_katom->kctx; @@ -523,13 +486,12 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, if (action == JS_COMMAND_SOFT_STOP) { bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev, - target_katom); + core_reqs); if (!soft_stop_allowed) { #ifdef CONFIG_MALI_DEBUG - dev_dbg(kbdev->dev, - "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", - (unsigned int)core_reqs); + dev_dbg(kbdev->dev, "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", + (unsigned int)core_reqs); #endif /* CONFIG_MALI_DEBUG */ return; } @@ -537,51 +499,9 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, /* We are about to issue a soft stop, so mark the atom as having * been soft stopped */ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; + } - /* Mark the point where we issue the soft-stop command */ - kbase_tlstream_aux_issue_job_softstop(target_katom); - - if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { - int i; - - for (i = 0; - i < kbase_backend_nr_atoms_submitted(kbdev, js); - i++) { - struct kbase_jd_atom *katom; - - katom = kbase_gpu_inspect(kbdev, js, i); - - KBASE_DEBUG_ASSERT(katom); - - /* For HW_ISSUE_8316, only 'bad' jobs attacking - * the system can cause this issue: normally, - * all memory should be allocated in multiples - * of 4 pages, and growable memory should be - * changed size in multiples of 4 pages. - * - * Whilst such 'bad' jobs can be cleared by a - * GPU reset, the locking up of a uTLB entry - * caused by the bad job could also stall other - * ASs, meaning that other ASs' jobs don't - * complete in the 'grace' period before the - * reset. We don't want to lose other ASs' jobs - * when they would normally complete fine, so we - * must 'poke' the MMU regularly to help other - * ASs complete */ - kbase_as_poking_timer_retain_atom( - kbdev, katom->kctx, katom); - } - } - - if (kbase_hw_has_feature( - kbdev, - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { - action = (target_katom->atom_flags & - KBASE_KATOM_FLAGS_JOBCHAIN) ? - JS_COMMAND_SOFT_STOP_1 : - JS_COMMAND_SOFT_STOP_0; - } - } else if (action == JS_COMMAND_HARD_STOP) { + if (action == JS_COMMAND_HARD_STOP) { bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev, core_reqs); @@ -605,21 +525,55 @@ void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, * hard-stop fails, so it is safe to just return and * ignore the hard-stop request. */ - dev_warn(kbdev->dev, - "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", - (unsigned int)core_reqs); + dev_warn(kbdev->dev, "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", + (unsigned int)core_reqs); return; } target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED; + } + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316) && + action == JS_COMMAND_SOFT_STOP) { + int i; - if (kbase_hw_has_feature( - kbdev, + for (i = 0; i < kbase_backend_nr_atoms_submitted(kbdev, js); + i++) { + struct kbase_jd_atom *katom; + + katom = kbase_gpu_inspect(kbdev, js, i); + + KBASE_DEBUG_ASSERT(katom); + + /* For HW_ISSUE_8316, only 'bad' jobs attacking the + * system can cause this issue: normally, all memory + * should be allocated in multiples of 4 pages, and + * growable memory should be changed size in multiples + * of 4 pages. + * + * Whilst such 'bad' jobs can be cleared by a GPU reset, + * the locking up of a uTLB entry caused by the bad job + * could also stall other ASs, meaning that other ASs' + * jobs don't complete in the 'grace' period before the + * reset. We don't want to lose other ASs' jobs when + * they would normally complete fine, so we must 'poke' + * the MMU regularly to help other ASs complete */ + kbase_as_poking_timer_retain_atom(kbdev, katom->kctx, + katom); + } + } + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + if (action == JS_COMMAND_SOFT_STOP) action = (target_katom->atom_flags & - KBASE_KATOM_FLAGS_JOBCHAIN) ? - JS_COMMAND_HARD_STOP_1 : - JS_COMMAND_HARD_STOP_0; - } + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_SOFT_STOP_1 : + JS_COMMAND_SOFT_STOP_0; + else + action = (target_katom->atom_flags & + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_HARD_STOP_1 : + JS_COMMAND_HARD_STOP_0; } kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx); @@ -745,6 +699,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, struct kbase_jd_atom *target_katom) { struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; int js = target_katom->slot_nr; int priority = target_katom->sched_priority; int i; @@ -752,6 +707,7 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, KBASE_DEBUG_ASSERT(kctx != NULL); kbdev = kctx->kbdev; KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); @@ -869,13 +825,6 @@ u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev) { u32 flush_id = 0; - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_FLUSH_REDUCTION)) { - mutex_lock(&kbdev->pm.lock); - if (kbdev->pm.backend.gpu_powered) - flush_id = kbase_reg_read(kbdev, - GPU_CONTROL_REG(LATEST_FLUSH), NULL); - mutex_unlock(&kbdev->pm.lock); - } return flush_id; } @@ -1083,7 +1032,7 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, * state when the soft/hard-stop action is complete */ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, - base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom) + u16 core_reqs, struct kbase_jd_atom *target_katom) { u32 hw_action = action & JS_COMMAND_MASK; @@ -1095,7 +1044,7 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, /* For soft-stop, don't enter if soft-stop not allowed, or isn't * causing disjoint */ if (hw_action == JS_COMMAND_SOFT_STOP && - !(kbasep_soft_stop_allowed(kbdev, target_katom) && + !(kbasep_soft_stop_allowed(kbdev, core_reqs) && (action & JS_COMMAND_SW_CAUSES_DISJOINT))) return; @@ -1160,6 +1109,26 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev) kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL)); } +static void kbasep_save_hwcnt_setup(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_uk_hwcnt_setup *hwcnt_setup) +{ + hwcnt_setup->dump_buffer = + kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kctx) & + 0xffffffff; + hwcnt_setup->dump_buffer |= (u64) + kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), kctx) << + 32; + hwcnt_setup->jm_bm = + kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), kctx); + hwcnt_setup->shader_bm = + kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), kctx); + hwcnt_setup->tiler_bm = + kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), kctx); + hwcnt_setup->mmu_l2_bm = + kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), kctx); +} + static void kbasep_reset_timeout_worker(struct work_struct *data) { unsigned long flags, mmu_flags; @@ -1167,8 +1136,10 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) int i; ktime_t end_timestamp = ktime_get(); struct kbasep_js_device_data *js_devdata; + struct kbase_uk_hwcnt_setup hwcnt_setup = { {0} }; + enum kbase_instr_state bckp_state; bool try_schedule = false; - bool silent = false; + bool restore_hwc = false; KBASE_DEBUG_ASSERT(data); @@ -1178,16 +1149,8 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) KBASE_DEBUG_ASSERT(kbdev); js_devdata = &kbdev->js_data; - if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_SILENT) - silent = true; - KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); - /* Suspend vinstr. - * This call will block until vinstr is suspended. */ - kbase_vinstr_suspend(kbdev->vinstr_ctx); - /* Make sure the timer has completed - this cannot be done from * interrupt context, so this cannot be done within * kbasep_try_reset_gpu_early. */ @@ -1237,14 +1200,39 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) * assume that anything that is still left on the GPU is stuck there and * we'll kill it when we reset the GPU */ - if (!silent) - dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", + dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", RESET_TIMEOUT); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { + /* the same interrupt handler preempted itself */ + /* GPU is being reset */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + /* Save the HW counters setup */ + if (kbdev->hwcnt.kctx != NULL) { + struct kbase_context *kctx = kbdev->hwcnt.kctx; + + if (kctx->jctx.sched_info.ctx.is_scheduled) { + kbasep_save_hwcnt_setup(kbdev, kctx, &hwcnt_setup); + + restore_hwc = true; + } + } + /* Output the state of some interesting registers to help in the * debugging of GPU resets */ - if (!silent) - kbase_debug_dump_registers(kbdev); + kbase_debug_dump_registers(kbdev); + + bckp_state = kbdev->hwcnt.backend.state; + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_RESETTING; + kbdev->hwcnt.backend.triggered = 0; + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); /* Reset the GPU */ kbase_pm_init_hw(kbdev, 0); @@ -1284,14 +1272,101 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) kbase_disjoint_state_down(kbdev); wake_up(&kbdev->hwaccess.backend.reset_wait); - if (!silent) - dev_err(kbdev->dev, "Reset complete"); + dev_err(kbdev->dev, "Reset complete"); if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending) try_schedule = true; mutex_unlock(&js_devdata->runpool_mutex); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + /* Restore the HW counters setup */ + if (restore_hwc) { + struct kbase_context *kctx = kbdev->hwcnt.kctx; + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), + (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | + PRFCNT_CONFIG_MODE_OFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), + hwcnt_setup.dump_buffer & 0xFFFFFFFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), + hwcnt_setup.dump_buffer >> 32, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), + hwcnt_setup.jm_bm, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), + hwcnt_setup.shader_bm, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), + hwcnt_setup.mmu_l2_bm, kctx); + + /* Due to PRLAM-8186 we need to disable the Tiler before we + * enable the HW counter dump. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), + 0, kctx); + else + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), + hwcnt_setup.tiler_bm, kctx); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), + (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | + PRFCNT_CONFIG_MODE_MANUAL, kctx); + + /* If HW has PRLAM-8186 we can now re-enable the tiler HW + * counters dump */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), + hwcnt_setup.tiler_bm, kctx); + } + kbdev->hwcnt.backend.state = bckp_state; + switch (kbdev->hwcnt.backend.state) { + /* Cases for waking kbasep_cache_clean_worker worker */ + case KBASE_INSTR_STATE_CLEANED: + /* Cache-clean IRQ occurred, but we reset: + * Wakeup incase the waiter saw RESETTING */ + case KBASE_INSTR_STATE_REQUEST_CLEAN: + /* After a clean was requested, but before the regs were + * written: + * Wakeup incase the waiter saw RESETTING */ + wake_up(&kbdev->hwcnt.backend.cache_clean_wait); + break; + case KBASE_INSTR_STATE_CLEANING: + /* Either: + * 1) We've not got the Cache-clean IRQ yet: it was lost, or: + * 2) We got it whilst resetting: it was voluntarily lost + * + * So, move to the next state and wakeup: */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED; + wake_up(&kbdev->hwcnt.backend.cache_clean_wait); + break; + + /* Cases for waking anyone else */ + case KBASE_INSTR_STATE_DUMPING: + /* If dumping, abort the dump, because we may've lost the IRQ */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + break; + case KBASE_INSTR_STATE_DISABLED: + case KBASE_INSTR_STATE_IDLE: + case KBASE_INSTR_STATE_FAULT: + /* Every other reason: wakeup in that state */ + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + break; + + /* Unhandled cases */ + case KBASE_INSTR_STATE_RESETTING: + default: + BUG(); + break; + } + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Resume the vinstr core */ + kbase_vinstr_hwc_resume(kbdev->vinstr_ctx); + + /* Note: counter dumping may now resume */ + mutex_lock(&kbdev->pm.lock); /* Find out what cores are required now */ @@ -1311,10 +1386,6 @@ static void kbasep_reset_timeout_worker(struct work_struct *data) } kbase_pm_context_idle(kbdev); - - /* Release vinstr */ - kbase_vinstr_resume(kbdev->vinstr_ctx); - KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); } @@ -1396,7 +1467,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) * * Return: * The function returns a boolean which should be interpreted as follows: - * true - Prepared for reset, kbase_reset_gpu_locked should be called. + * true - Prepared for reset, kbase_reset_gpu should be called. * false - Another thread is performing a reset, kbase_reset_gpu should * not be called. */ @@ -1490,29 +1561,4 @@ void kbase_reset_gpu_locked(struct kbase_device *kbdev) /* Try resetting early */ kbasep_try_reset_gpu_early_locked(kbdev); } - -void kbase_reset_gpu_silent(struct kbase_device *kbdev) -{ - if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, - KBASE_RESET_GPU_NOT_PENDING, - KBASE_RESET_GPU_SILENT) != - KBASE_RESET_GPU_NOT_PENDING) { - /* Some other thread is already resetting the GPU */ - return; - } - - kbase_disjoint_state_up(kbdev); - - queue_work(kbdev->hwaccess.backend.reset_workq, - &kbdev->hwaccess.backend.reset_work); -} - -bool kbase_reset_gpu_active(struct kbase_device *kbdev) -{ - if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == - KBASE_RESET_GPU_NOT_PENDING) - return false; - - return true; -} #endif /* KBASE_GPU_RESET_EN */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h index 8f1e5615ea436..eb068d40283b2 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -96,7 +96,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev, void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, int js, u32 action, - base_jd_core_req core_reqs, + u16 core_reqs, struct kbase_jd_atom *target_katom); /** diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c index da7c4df7d277c..c0168c74f8154 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,11 +24,11 @@ #include #include #include -#include #include #include #include #include +#include #include /* Return whether the specified ringbuffer is empty. HW access lock must be @@ -592,7 +592,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, case KBASE_ATOM_GPU_RB_READY: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY: + case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: @@ -603,9 +603,6 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: break; - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT: - /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ @@ -657,145 +654,53 @@ static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js) return true; } -static inline bool kbase_gpu_in_protected_mode(struct kbase_device *kbdev) +static bool kbase_gpu_in_secure_mode(struct kbase_device *kbdev) { - return kbdev->protected_mode; + return kbdev->secure_mode; } -static int kbase_gpu_protected_mode_enter(struct kbase_device *kbdev) +static int kbase_gpu_secure_mode_enable(struct kbase_device *kbdev) { int err = -EINVAL; lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - WARN_ONCE(!kbdev->protected_ops, - "Cannot enter protected mode: protected callbacks not specified.\n"); + WARN_ONCE(!kbdev->secure_ops, + "Cannot enable secure mode: secure callbacks not specified.\n"); - if (kbdev->protected_ops) { - /* Switch GPU to protected mode */ - err = kbdev->protected_ops->protected_mode_enter(kbdev); + if (kbdev->secure_ops) { + /* Switch GPU to secure mode */ + err = kbdev->secure_ops->secure_mode_enable(kbdev); if (err) - dev_warn(kbdev->dev, "Failed to enable protected mode: %d\n", - err); + dev_warn(kbdev->dev, "Failed to enable secure mode: %d\n", err); else - kbdev->protected_mode = true; + kbdev->secure_mode = true; } return err; } -static int kbase_gpu_protected_mode_reset(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - - WARN_ONCE(!kbdev->protected_ops, - "Cannot exit protected mode: protected callbacks not specified.\n"); - - if (!kbdev->protected_ops) - return -EINVAL; - - kbdev->protected_mode_transition = true; - kbase_reset_gpu_silent(kbdev); - - return 0; -} - -static int kbase_jm_exit_protected_mode(struct kbase_device *kbdev, - struct kbase_jd_atom **katom, int idx, int js) +static int kbase_gpu_secure_mode_disable(struct kbase_device *kbdev) { - int err = 0; - - switch (katom[idx]->exit_protected_state) { - case KBASE_ATOM_EXIT_PROTECTED_CHECK: - /* - * If the atom ahead of this one hasn't got to being - * submitted yet then bail. - */ - if (idx == 1 && - (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && - katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) - return -EAGAIN; - - /* If we're not exiting protected mode then we're done here. */ - if (!(kbase_gpu_in_protected_mode(kbdev) && - !kbase_jd_katom_is_protected(katom[idx]))) - return 0; - - /* - * If there is a transition in progress, or work still - * on the GPU try again later. - */ - if (kbdev->protected_mode_transition || - kbase_gpu_atoms_submitted_any(kbdev)) - return -EAGAIN; - - /* - * Exiting protected mode requires a reset, but first the L2 - * needs to be powered down to ensure it's not active when the - * reset is issued. - */ - katom[idx]->exit_protected_state = - KBASE_ATOM_EXIT_PROTECTED_IDLE_L2; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2: - if (kbase_pm_get_active_cores(kbdev, KBASE_PM_CORE_L2) || - kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) { - /* - * The L2 is still powered, wait for all the users to - * finish with it before doing the actual reset. - */ - return -EAGAIN; - } - katom[idx]->exit_protected_state = - KBASE_ATOM_EXIT_PROTECTED_RESET; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_EXIT_PROTECTED_RESET: - /* Issue the reset to the GPU */ - err = kbase_gpu_protected_mode_reset(kbdev); - if (err) { - /* Failed to exit protected mode, fail atom */ - katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; - kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); - /* Only return if head atom or previous atom - * already removed - as atoms must be returned - * in order */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - kbase_gpu_dequeue_atom(kbdev, js, NULL); - kbase_jm_return_atom_to_js(kbdev, katom[idx]); - } - - kbase_vinstr_resume(kbdev->vinstr_ctx); - - return -EINVAL; - } + int err = -EINVAL; - katom[idx]->exit_protected_state = - KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT; + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + WARN_ONCE(!kbdev->secure_ops, + "Cannot disable secure mode: secure callbacks not specified.\n"); - case KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT: - if (kbase_reset_gpu_active(kbdev)) - return -EAGAIN; + if (kbdev->secure_ops) { + /* Switch GPU to non-secure mode */ + err = kbdev->secure_ops->secure_mode_disable(kbdev); - /* protected mode sanity checks */ - KBASE_DEBUG_ASSERT_MSG( - kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev), - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", - kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); - KBASE_DEBUG_ASSERT_MSG( - (kbase_jd_katom_is_protected(katom[idx]) && js == 0) || - !kbase_jd_katom_is_protected(katom[idx]), - "Protected atom on JS%d not supported", js); + if (err) + dev_warn(kbdev->dev, "Failed to disable secure mode: %d\n", err); + else + kbdev->secure_mode = false; } - return 0; + return err; } void kbase_gpu_slot_update(struct kbase_device *kbdev) @@ -814,7 +719,6 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) for (idx = 0; idx < SLOT_RB_SIZE; idx++) { bool cores_ready; - int ret; if (!katom[idx]) continue; @@ -831,48 +735,10 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) break; katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT; + KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT: - /* - * Exiting protected mode must be done before - * the references on the cores are taken as - * a power down the L2 is required which - * can't happen after the references for this - * atom are taken. - */ - ret = kbase_jm_exit_protected_mode(kbdev, - katom, idx, js); - if (ret) - break; - - katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; - - /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: - if (katom[idx]->will_fail_event_code) { - kbase_gpu_mark_atom_for_return(kbdev, - katom[idx]); - /* Set EVENT_DONE so this atom will be - completed, not unpulled. */ - katom[idx]->event_code = - BASE_JD_EVENT_DONE; - /* Only return if head atom or previous - * atom already removed - as atoms must - * be returned in order. */ - if (idx == 0 || katom[0]->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { - kbase_gpu_dequeue_atom(kbdev, js, NULL); - kbase_jm_return_atom_to_js(kbdev, katom[idx]); - } - break; - } - - cores_ready = kbasep_js_job_check_ref_cores(kbdev, js, katom[idx]); @@ -899,28 +765,12 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) break; katom[idx]->gpu_rb_state = - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY; + KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ - case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY: - - /* Only submit if head atom or previous atom - * already submitted */ - if (idx == 1 && - (katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && - katom[0]->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) - break; - - /* - * If the GPU is transitioning protected mode - * then bail now and we'll be called when the - * new state has settled. - */ - if (kbdev->protected_mode_transition) - break; - - if (!kbase_gpu_in_protected_mode(kbdev) && kbase_jd_katom_is_protected(katom[idx])) { + case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE: + if (kbase_gpu_in_secure_mode(kbdev) != kbase_jd_katom_is_secure(katom[idx])) { int err = 0; /* Not in correct mode, take action */ @@ -934,26 +784,16 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) */ break; } - if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) { - /* - * We can't switch now because - * the vinstr core state switch - * is not done yet. - */ - break; - } - /* Once reaching this point GPU must be - * switched to protected mode or vinstr - * re-enabled. */ /* No jobs running, so we can switch GPU mode right now */ - err = kbase_gpu_protected_mode_enter(kbdev); + if (kbase_jd_katom_is_secure(katom[idx])) { + err = kbase_gpu_secure_mode_enable(kbdev); + } else { + err = kbase_gpu_secure_mode_disable(kbdev); + } + if (err) { - /* - * Failed to switch into protected mode, resume - * vinstr core and fail atom. - */ - kbase_vinstr_resume(kbdev->vinstr_ctx); + /* Failed to switch secure mode, fail atom */ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); /* Only return if head atom or previous atom @@ -968,18 +808,22 @@ void kbase_gpu_slot_update(struct kbase_device *kbdev) } } - /* Protected mode sanity checks */ + /* Secure mode sanity checks */ KBASE_DEBUG_ASSERT_MSG( - kbase_jd_katom_is_protected(katom[idx]) == kbase_gpu_in_protected_mode(kbdev), - "Protected mode of atom (%d) doesn't match protected mode of GPU (%d)", - kbase_jd_katom_is_protected(katom[idx]), kbase_gpu_in_protected_mode(kbdev)); + kbase_jd_katom_is_secure(katom[idx]) == kbase_gpu_in_secure_mode(kbdev), + "Secure mode of atom (%d) doesn't match secure mode of GPU (%d)", + kbase_jd_katom_is_secure(katom[idx]), kbase_gpu_in_secure_mode(kbdev)); + KBASE_DEBUG_ASSERT_MSG( + (kbase_jd_katom_is_secure(katom[idx]) && js == 0) || + !kbase_jd_katom_is_secure(katom[idx]), + "Secure atom on JS%d not supported", js); + katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ case KBASE_ATOM_GPU_RB_READY: - /* Only submit if head atom or previous atom * already submitted */ if (idx == 1 && @@ -1100,16 +944,8 @@ void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, } katom = kbase_gpu_dequeue_atom(kbdev, js, end_timestamp); + kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0); - kbase_tlstream_tl_nret_atom_lpu( - katom, - &kbdev->gpu_props.props.raw_props.js_features[ - katom->slot_nr]); - kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]); - kbase_tlstream_tl_nret_ctx_lpu( - kctx, - &kbdev->gpu_props.props.raw_props.js_features[ - katom->slot_nr]); if (completion_code == BASE_JD_EVENT_STOPPED) { struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, @@ -1262,34 +1098,13 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) for (idx = 0; idx < 2; idx++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); - bool keep_in_jm_rb = false; - if (!katom) - continue; - - if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) - keep_in_jm_rb = true; - - kbase_gpu_release_atom(kbdev, katom, NULL); - - /* - * If the atom wasn't on HW when the reset was issued - * then leave it in the RB and next time we're kicked - * it will be processed again from the starting state. - */ - if (keep_in_jm_rb) { - katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; - katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK; - continue; + if (katom) { + kbase_gpu_release_atom(kbdev, katom, NULL); + kbase_gpu_dequeue_atom(kbdev, js, NULL); + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + kbase_jm_complete(kbdev, katom, end_timestamp); } - - /* - * The atom was on the HW when the reset was issued - * all we can do is fail the atom. - */ - kbase_gpu_dequeue_atom(kbdev, js, NULL); - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - kbase_jm_complete(kbdev, katom, end_timestamp); } } } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c index d665420ab3805..6a49669af6302 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,7 +23,6 @@ #include #include "mali_kbase_js_affinity.h" -#include "mali_kbase_hw.h" #include @@ -115,14 +114,9 @@ bool kbase_js_choose_affinity(u64 * const affinity, if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == BASE_JD_REQ_T) { spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); - /* If the hardware supports XAFFINITY then we'll only enable - * the tiler (which is the default so this is a no-op), - * otherwise enable shader core 0. */ - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) - *affinity = 1; - else - *affinity = 0; - + /* Tiler only job, bit 0 needed to enable tiler but no shader + * cores required */ + *affinity = 1; return true; } @@ -178,12 +172,9 @@ bool kbase_js_choose_affinity(u64 * const affinity, if (*affinity == 0) return false; - /* Enable core 0 if tiler required for hardware without XAFFINITY - * support (notes above) */ - if (core_req & BASE_JD_REQ_T) { - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) - *affinity = *affinity | 1; - } + /* Enable core 0 if tiler required */ + if (core_req & BASE_JD_REQ_T) + *affinity = *affinity | 1; return true; } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h index fbffa3b409621..3026e6a583034 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,6 +24,14 @@ #ifndef _KBASE_JS_AFFINITY_H_ #define _KBASE_JS_AFFINITY_H_ +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS +/* Import the external affinity mask variables */ +extern u64 mali_js0_affinity_mask; +extern u64 mali_js1_affinity_mask; +extern u64 mali_js2_affinity_mask; +#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ + + /** * kbase_js_can_run_job_on_slot_no_lock - Decide whether it is possible to * submit a job to a particular job slot in the current status diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c index a23deb4ca20cf..1e9a7e4c466da 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -138,17 +138,6 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) js_devdata->gpu_reset_ticks_ss; } - /* If timeouts have been changed then ensure - * that atom tick count is not greater than the - * new soft_stop timeout. This ensures that - * atoms do not miss any of the timeouts due to - * races between this worker and the thread - * changing the timeouts. */ - if (backend->timeouts_updated && - ticks > soft_stop_ticks) - ticks = atom->sched_info.cfs.ticks = - soft_stop_ticks; - /* Job is Soft-Stoppable */ if (ticks == soft_stop_ticks) { int disjoint_threshold = @@ -268,8 +257,6 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), HRTIMER_MODE_REL); - backend->timeouts_updated = false; - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); return HRTIMER_NORESTART; @@ -348,10 +335,3 @@ void kbase_backend_timer_resume(struct kbase_device *kbdev) kbase_backend_ctx_count_changed(kbdev); } -void kbase_backend_timeouts_changed(struct kbase_device *kbdev) -{ - struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - - backend->timeouts_updated = true; -} - diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c index 4a3572d971a6b..4fd13e2de63e3 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,9 +20,11 @@ #include #include #include +#if defined(CONFIG_MALI_MIPE_ENABLED) #include +#endif +#include #include -#include static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn, u32 num_pages) @@ -152,9 +154,6 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) AS_FAULTADDRESS_LO), kctx); - /* report the fault to debugfs */ - kbase_as_fault_debugfs_new(kbdev, as_no); - /* record the fault status */ as->fault_status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, @@ -166,15 +165,6 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) KBASE_MMU_FAULT_TYPE_BUS : KBASE_MMU_FAULT_TYPE_PAGE; -#ifdef CONFIG_MALI_GPU_MMU_AARCH64 - as->fault_extra_addr = kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_HI), - kctx); - as->fault_extra_addr <<= 32; - as->fault_extra_addr |= kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_LO), - kctx); -#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ if (kbase_as_has_bus_fault(as)) { /* Mark bus fault as handled. @@ -213,36 +203,10 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, struct kbase_context *kctx) { struct kbase_mmu_setup *current_setup = &as->current_setup; +#ifdef CONFIG_MALI_MIPE_ENABLED u32 transcfg = 0; +#endif -#ifdef CONFIG_MALI_GPU_MMU_AARCH64 - transcfg = current_setup->transcfg & 0xFFFFFFFFUL; - - /* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */ - /* Clear PTW_MEMATTR bits */ - transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK; - /* Enable correct PTW_MEMATTR bits */ - transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK; - - if (kbdev->system_coherency == COHERENCY_ACE) { - /* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */ - /* Clear PTW_SH bits */ - transcfg = (transcfg & ~AS_TRANSCFG_PTW_SH_MASK); - /* Enable correct PTW_SH bits */ - transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); - } - - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), - transcfg, kctx); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), - (current_setup->transcfg >> 32) & 0xFFFFFFFFUL, kctx); - -#else /* CONFIG_MALI_GPU_MMU_AARCH64 */ - - if (kbdev->system_coherency == COHERENCY_ACE) - current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER; - -#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), current_setup->transtab & 0xFFFFFFFFUL, kctx); @@ -254,10 +218,12 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), (current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx); +#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_attrib_as_config(as, current_setup->transtab, current_setup->memattr, transcfg); +#endif write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx); } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c index 711e44c7f80ad..947a7ed285d6a 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -168,7 +168,6 @@ bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) /* Force all cores off */ kbdev->pm.backend.desired_shader_state = 0; - kbdev->pm.backend.desired_tiler_state = 0; /* Force all cores to be unavailable, in the situation where * transitions are in progress for some cores but not others, diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c index f891fa225a89f..487391168e25a 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,8 +35,7 @@ static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev) static bool coarse_demand_get_core_active(struct kbase_device *kbdev) { if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | - kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt - && !kbdev->tiler_inuse_cnt) + kbdev->shader_inuse_bitmap)) return false; return true; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h index e8f96fe6c5146..60e40915869c8 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -192,14 +192,12 @@ union kbase_pm_ca_policy_data { * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is * powered off * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders - * and/or timers are powered off + * are powered off * @gpu_poweroff_timer: Timer for powering off GPU * @gpu_poweroff_wq: Workqueue to power off GPU on when timer fires * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq * @shader_poweroff_pending: Bit mask of shaders to be powered off on next * timer callback - * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer - * callback * @poweroff_timer_needed: true if the poweroff timer is currently required, * false otherwise * @poweroff_timer_running: true if the poweroff timer is currently running, @@ -221,6 +219,9 @@ union kbase_pm_ca_policy_data { * &struct kbase_pm_callback_conf * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See * &struct kbase_pm_callback_conf + * @callback_cci_snoop_ctrl: Callback when the GPU L2 power may transition. + * If enable is set then snoops should be enabled + * otherwise snoops should be disabled * * Note: * During an IRQ, @ca_current_policy or @pm_current_policy can be NULL when the @@ -276,7 +277,6 @@ struct kbase_pm_backend_data { struct work_struct gpu_poweroff_work; u64 shader_poweroff_pending; - u64 tiler_poweroff_pending; bool poweroff_timer_needed; bool poweroff_timer_running; @@ -288,6 +288,7 @@ struct kbase_pm_backend_data { int (*callback_power_runtime_on)(struct kbase_device *kbdev); void (*callback_power_runtime_off)(struct kbase_device *kbdev); int (*callback_power_runtime_idle)(struct kbase_device *kbdev); + }; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c index 81322fd0dd175..9dac2303bd00a 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,8 +37,7 @@ static u64 demand_get_core_mask(struct kbase_device *kbdev) static bool demand_get_core_active(struct kbase_device *kbdev) { if (0 == kbdev->pm.active_count && !(kbdev->shader_needed_bitmap | - kbdev->shader_inuse_bitmap) && !kbdev->tiler_needed_cnt - && !kbdev->tiler_inuse_cnt) + kbdev->shader_inuse_bitmap)) return false; return true; diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c index 03ba23d543651..5c1388448d284 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,8 +30,11 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif +#if defined(CONFIG_MALI_MIPE_ENABLED) #include +#endif #include +#include #include #include #include @@ -119,39 +122,6 @@ static u32 core_type_to_reg(enum kbase_pm_core_type core_type, return (u32)core_type + (u32)action; } -#ifdef CONFIG_ARM64 -static void mali_cci_flush_l2(struct kbase_device *kbdev) -{ - const u32 mask = CLEAN_CACHES_COMPLETED | RESET_COMPLETED; - u32 loops = KBASE_CLEAN_CACHE_MAX_LOOPS; - u32 raw; - - /* - * Note that we don't take the cache flush mutex here since - * we expect to be the last user of the L2, all other L2 users - * would have dropped their references, to initiate L2 power - * down, L2 power down being the only valid place for this - * to be called from. - */ - - kbase_reg_write(kbdev, - GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_CLEAN_INV_CACHES, - NULL); - - raw = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), - NULL); - - /* Wait for cache flush to complete before continuing, exit on - * gpu resets or loop expiry. */ - while (((raw & mask) == 0) && --loops) { - raw = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), - NULL); - } -} -#endif /** * kbase_pm_invoke - Invokes an action on a core set @@ -206,7 +176,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, kbase_trace_mali_pm_power_off(core_type, cores); } #endif - +#if defined(CONFIG_MALI_MIPE_ENABLED) if (cores) { u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); @@ -216,7 +186,7 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, state &= ~cores; kbase_tlstream_aux_pm_state(core_type, state); } - +#endif /* Tracing */ if (cores) { if (action == ACTION_PWRON) @@ -249,8 +219,6 @@ static void kbase_pm_invoke(struct kbase_device *kbdev, case KBASE_PM_CORE_L2: KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL, 0u, lo); - /* disable snoops before L2 is turned off */ - kbase_pm_cache_snoop_disable(kbdev); break; default: break; @@ -478,12 +446,6 @@ static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, /* All are ready, none will be turned off, and none are * transitioning */ kbdev->pm.backend.l2_powered = 1; - /* - * Ensure snoops are enabled after L2 is powered up, - * note that kbase keeps track of the snoop state, so - * safe to repeatedly call. - */ - kbase_pm_cache_snoop_enable(kbdev); if (kbdev->l2_users_count > 0) { /* Notify any registered l2 cache users * (optimized out when no users waiting) */ @@ -551,12 +513,10 @@ KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type); * @present: The bit mask of present caches * @cores_powered: A bit mask of cores (or L2 caches) that are desired to * be powered - * @tilers_powered: The bit mask of tilers that are desired to be powered * * Return: A bit mask of the caches that should be turned on */ -static u64 get_desired_cache_status(u64 present, u64 cores_powered, - u64 tilers_powered) +static u64 get_desired_cache_status(u64 present, u64 cores_powered) { u64 desired = 0; @@ -579,10 +539,6 @@ static u64 get_desired_cache_status(u64 present, u64 cores_powered, present &= ~bit_mask; } - /* Power up the required L2(s) for the tiler */ - if (tilers_powered) - desired |= 1; - return desired; } @@ -595,7 +551,6 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) bool in_desired_state = true; u64 desired_l2_state; u64 cores_powered; - u64 tilers_powered; u64 tiler_available_bitmap; u64 shader_available_bitmap; u64 shader_ready_bitmap; @@ -629,10 +584,6 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) cores_powered |= kbdev->pm.backend.desired_shader_state; - /* Work out which tilers want to be powered */ - tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER); - tilers_powered |= kbdev->pm.backend.desired_tiler_state; - /* If there are l2 cache users registered, keep all l2s powered even if * all other cores are off. */ if (kbdev->l2_users_count > 0) @@ -640,11 +591,17 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) desired_l2_state = get_desired_cache_status( kbdev->gpu_props.props.raw_props.l2_present, - cores_powered, tilers_powered); + cores_powered); /* If any l2 cache is on, then enable l2 #0, for use by job manager */ - if (0 != desired_l2_state) + if (0 != desired_l2_state) { desired_l2_state |= 1; + /* Also enable tiler if l2 cache is powered */ + kbdev->pm.backend.desired_tiler_state = + kbdev->gpu_props.props.raw_props.tiler_present; + } else { + kbdev->pm.backend.desired_tiler_state = 0; + } prev_l2_available_bitmap = kbdev->l2_available_bitmap; in_desired_state &= kbase_pm_transition_core_type(kbdev, @@ -750,7 +707,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER)); #endif - +#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_pm_state( KBASE_PM_CORE_L2, kbase_pm_get_ready_cores( @@ -764,6 +721,7 @@ MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) kbase_pm_get_ready_cores( kbdev, KBASE_PM_CORE_TILER)); +#endif KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL, kbdev->pm.backend.gpu_in_desired_state, @@ -1060,7 +1018,6 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend) return false; } - kbase_pm_cache_snoop_disable(kbdev); /* The GPU power may be turned off from this point */ kbdev->pm.backend.gpu_powered = false; @@ -1143,20 +1100,18 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327)) kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD; -#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY /* Enable alternative hardware counter selection if configured. */ - if (!GPU_ID_IS_NEW_FORMAT(prod_id)) + if (DEFAULT_ALTERNATIVE_HWC) kbdev->hw_quirks_sc |= SC_ALT_COUNTERS; -#endif /* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797)) kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS; if (!kbase_hw_has_issue(kbdev, GPUCORE_1619)) { - if (prod_id < 0x750 || prod_id == 0x6956) /* T60x, T62x, T72x */ + if (prod_id < 0x760 || prod_id == 0x6956) /* T60x, T62x, T72x */ kbdev->hw_quirks_sc |= SC_LS_ATTR_CHECK_DISABLE; - else if (prod_id >= 0x750 && prod_id <= 0x880) /* T76x, T8xx */ + else if (prod_id >= 0x760 && prod_id <= 0x880) /* T76x, T8xx */ kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES; } @@ -1181,12 +1136,6 @@ static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT; - if (kbdev->system_coherency == COHERENCY_ACE) { - /* Allow memory configuration disparity to be ignored, we - * optimize the use of shared memory and thus we expect - * some disparity in the memory configuration */ - kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY; - } /* Only for T86x/T88x-based products after r2p0 */ if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) { @@ -1251,42 +1200,51 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) } -void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev) -{ - if ((kbdev->system_coherency == COHERENCY_ACE) && - !kbdev->cci_snoop_enabled) { -#ifdef CONFIG_ARM64 - if (kbdev->snoop_enable_smc != 0) - kbase_invoke_smc_fid(kbdev->snoop_enable_smc, 0, 0, 0); -#endif /* CONFIG_ARM64 */ - dev_dbg(kbdev->dev, "MALI - CCI Snoops - Enabled\n"); - kbdev->cci_snoop_enabled = true; - } -} -void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev) +int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) { - if ((kbdev->system_coherency == COHERENCY_ACE) && - kbdev->cci_snoop_enabled) { -#ifdef CONFIG_ARM64 - if (kbdev->snoop_disable_smc != 0) { - mali_cci_flush_l2(kbdev); - kbase_invoke_smc_fid(kbdev->snoop_disable_smc, 0, 0, 0); - } -#endif /* CONFIG_ARM64 */ - dev_dbg(kbdev->dev, "MALI - CCI Snoops Disabled\n"); - kbdev->cci_snoop_enabled = false; + unsigned long irq_flags; + struct kbasep_reset_timeout_data rtdata; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.lock); + + /* Ensure the clock is on before attempting to access the hardware */ + if (!kbdev->pm.backend.gpu_powered) { + if (kbdev->pm.backend.callback_power_on) + kbdev->pm.backend.callback_power_on(kbdev); + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, + irq_flags); + kbdev->pm.backend.gpu_powered = true; + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + irq_flags); } -} -static int kbase_pm_reset_do_normal(struct kbase_device *kbdev) -{ - struct kbasep_reset_timeout_data rtdata; + /* Ensure interrupts are off to begin with, this also clears any + * outstanding interrupts */ + kbase_pm_disable_interrupts(kbdev); + /* Prepare for the soft-reset */ + kbdev->pm.backend.reset_done = false; - KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); + /* The cores should be made unavailable due to the reset */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags); + if (kbdev->shader_available_bitmap != 0u) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, + NULL, 0u, (u32)0u); + if (kbdev->tiler_available_bitmap != 0u) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, + NULL, NULL, 0u, (u32)0u); + kbdev->shader_available_bitmap = 0u; + kbdev->tiler_available_bitmap = 0u; + kbdev->l2_available_bitmap = 0u; + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags); + /* Soft reset the GPU */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); +#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_jd_gpu_soft_reset(kbdev); - +#endif kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_SOFT_RESET, NULL); @@ -1312,7 +1270,7 @@ static int kbase_pm_reset_do_normal(struct kbase_device *kbdev) /* GPU has been reset */ hrtimer_cancel(&rtdata.timer); destroy_hrtimer_on_stack(&rtdata.timer); - return 0; + goto out; } /* No interrupt has been received - check if the RAWSTAT register says @@ -1348,7 +1306,7 @@ static int kbase_pm_reset_do_normal(struct kbase_device *kbdev) /* GPU has been reset */ hrtimer_cancel(&rtdata.timer); destroy_hrtimer_on_stack(&rtdata.timer); - return 0; + goto out; } destroy_hrtimer_on_stack(&rtdata.timer); @@ -1356,90 +1314,16 @@ static int kbase_pm_reset_do_normal(struct kbase_device *kbdev) dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", RESET_TIMEOUT); + /* The GPU still hasn't reset, give up */ return -EINVAL; -} - -static int kbase_pm_reset_do_protected(struct kbase_device *kbdev) -{ - KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); - kbase_tlstream_jd_gpu_soft_reset(kbdev); - - return kbdev->protected_ops->protected_mode_reset(kbdev); -} - -int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) -{ - unsigned long irq_flags; - int err; - bool resume_vinstr = false; - - KBASE_DEBUG_ASSERT(NULL != kbdev); - lockdep_assert_held(&kbdev->pm.lock); - - /* Ensure the clock is on before attempting to access the hardware */ - if (!kbdev->pm.backend.gpu_powered) { - if (kbdev->pm.backend.callback_power_on) - kbdev->pm.backend.callback_power_on(kbdev); - - spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, - irq_flags); - kbdev->pm.backend.gpu_powered = true; - spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, - irq_flags); - } - - /* Ensure interrupts are off to begin with, this also clears any - * outstanding interrupts */ - kbase_pm_disable_interrupts(kbdev); - /* Ensure cache snoops are disabled before reset. */ - kbase_pm_cache_snoop_disable(kbdev); - /* Prepare for the soft-reset */ - kbdev->pm.backend.reset_done = false; - - /* The cores should be made unavailable due to the reset */ - spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags); - if (kbdev->shader_available_bitmap != 0u) - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, - NULL, 0u, (u32)0u); - if (kbdev->tiler_available_bitmap != 0u) - KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, - NULL, NULL, 0u, (u32)0u); - kbdev->shader_available_bitmap = 0u; - kbdev->tiler_available_bitmap = 0u; - kbdev->l2_available_bitmap = 0u; - spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags); - /* Soft reset the GPU */ - if (kbdev->protected_mode_support && - kbdev->protected_ops->protected_mode_reset) - err = kbase_pm_reset_do_protected(kbdev); - else - err = kbase_pm_reset_do_normal(kbdev); - - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, irq_flags); - if (kbdev->protected_mode) - resume_vinstr = true; - kbdev->protected_mode_transition = false; - kbdev->protected_mode = false; - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, irq_flags); - - if (err) - goto exit; +out: if (flags & PM_HW_ISSUES_DETECT) kbase_pm_hw_issues_detect(kbdev); kbase_pm_hw_issues_apply(kbdev); - kbase_cache_set_coherency_mode(kbdev, kbdev->system_coherency); - - /* Sanity check protected mode was left after reset */ - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { - u32 gpu_status = kbase_reg_read(kbdev, - GPU_CONTROL_REG(GPU_STATUS), NULL); - - WARN_ON(gpu_status & GPU_STATUS_PROTECTED_MODE_ACTIVE); - } /* If cycle counter was in use re-enable it, enable_irqs will only be * false when called from kbase_pm_powerup */ @@ -1467,12 +1351,7 @@ int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) if (flags & PM_ENABLE_IRQS) kbase_pm_enable_interrupts(kbdev); -exit: - /* If GPU is leaving protected mode resume vinstr operation. */ - if (kbdev->vinstr_ctx && resume_vinstr) - kbase_vinstr_resume(kbdev->vinstr_ctx); - - return err; + return 0; } /** diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h index aa51b8cdef8fc..943eda567cb5e 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -501,23 +501,5 @@ void kbase_pm_power_changed(struct kbase_device *kbdev); void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *now); -/** - * kbase_pm_cache_snoop_enable - Allow CPU snoops on the GPU - * If the GPU does not have coherency this is a no-op - * @kbdev: Device pointer - * - * This function should be called after L2 power up. - */ - -void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev); - -/** - * kbase_pm_cache_snoop_disable - Prevent CPU snoops on the GPU - * If the GPU does not have coherency this is a no-op - * @kbdev: Device pointer - * - * This function should be called before L2 power off. - */ -void kbase_pm_cache_snoop_disable(struct kbase_device *kbdev); #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c index 4d006028089a1..343436fc353dd 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -154,22 +155,16 @@ static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev, static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev) { u64 prev_shader_state = kbdev->pm.backend.desired_shader_state; - u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state; lockdep_assert_held(&kbdev->pm.power_change_lock); kbdev->pm.backend.desired_shader_state &= ~kbdev->pm.backend.shader_poweroff_pending; - kbdev->pm.backend.desired_tiler_state &= - ~kbdev->pm.backend.tiler_poweroff_pending; kbdev->pm.backend.shader_poweroff_pending = 0; - kbdev->pm.backend.tiler_poweroff_pending = 0; - if (prev_shader_state != kbdev->pm.backend.desired_shader_state || - prev_tiler_state != - kbdev->pm.backend.desired_tiler_state || - kbdev->pm.backend.ca_in_transition) { + if (prev_shader_state != kbdev->pm.backend.desired_shader_state + || kbdev->pm.backend.ca_in_transition) { bool cores_are_available; KBASE_TIMELINE_PM_CHECKTRANS(kbdev, @@ -207,8 +202,7 @@ kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) queue_work(kbdev->pm.backend.gpu_poweroff_wq, &kbdev->pm.backend.gpu_poweroff_work); - if (kbdev->pm.backend.shader_poweroff_pending || - kbdev->pm.backend.tiler_poweroff_pending) { + if (kbdev->pm.backend.shader_poweroff_pending) { kbdev->pm.backend.shader_poweroff_pending_time--; KBASE_DEBUG_ASSERT( @@ -333,7 +327,6 @@ void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) kbdev->pm.backend.gpu_poweroff_pending = 0; kbdev->pm.backend.shader_poweroff_pending = 0; - kbdev->pm.backend.tiler_poweroff_pending = 0; kbdev->pm.backend.shader_poweroff_pending_time = 0; spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); @@ -388,10 +381,8 @@ void kbase_pm_update_active(struct kbase_device *kbdev) * when there are contexts active */ KBASE_DEBUG_ASSERT(pm->active_count == 0); - if (backend->shader_poweroff_pending || - backend->tiler_poweroff_pending) { + if (backend->shader_poweroff_pending) { backend->shader_poweroff_pending = 0; - backend->tiler_poweroff_pending = 0; backend->shader_poweroff_pending_time = 0; } @@ -450,7 +441,6 @@ void kbase_pm_update_active(struct kbase_device *kbdev) void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) { u64 desired_bitmap; - u64 desired_tiler_bitmap; bool cores_are_available; bool do_poweroff = false; @@ -463,37 +453,23 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev); desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev); + /* Enable core 0 if tiler required, regardless of core availability */ if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) - desired_tiler_bitmap = 1; - else - desired_tiler_bitmap = 0; - - if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) { - /* Unless XAFFINITY is supported, enable core 0 if tiler - * required, regardless of core availability */ - if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) - desired_bitmap |= 1; - } + desired_bitmap |= 1; if (kbdev->pm.backend.desired_shader_state != desired_bitmap) KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u, (u32)desired_bitmap); /* Are any cores being powered on? */ if (~kbdev->pm.backend.desired_shader_state & desired_bitmap || - ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap || kbdev->pm.backend.ca_in_transition) { /* Check if we are powering off any cores before updating shader * state */ - if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || - kbdev->pm.backend.desired_tiler_state & - ~desired_tiler_bitmap) { + if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) { /* Start timer to power off cores */ kbdev->pm.backend.shader_poweroff_pending |= (kbdev->pm.backend.desired_shader_state & ~desired_bitmap); - kbdev->pm.backend.tiler_poweroff_pending |= - (kbdev->pm.backend.desired_tiler_state & - ~desired_tiler_bitmap); if (kbdev->pm.poweroff_shader_ticks) kbdev->pm.backend.shader_poweroff_pending_time = @@ -503,28 +479,21 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) } kbdev->pm.backend.desired_shader_state = desired_bitmap; - kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap; /* If any cores are being powered on, transition immediately */ cores_are_available = kbase_pm_check_transitions_nolock(kbdev); - } else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap || - kbdev->pm.backend.desired_tiler_state & - ~desired_tiler_bitmap) { + } else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) { /* Start timer to power off cores */ kbdev->pm.backend.shader_poweroff_pending |= (kbdev->pm.backend.desired_shader_state & ~desired_bitmap); - kbdev->pm.backend.tiler_poweroff_pending |= - (kbdev->pm.backend.desired_tiler_state & - ~desired_tiler_bitmap); if (kbdev->pm.poweroff_shader_ticks) kbdev->pm.backend.shader_poweroff_pending_time = kbdev->pm.poweroff_shader_ticks; else kbasep_pm_do_poweroff_cores(kbdev); } else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 && - desired_tiler_bitmap != 0 && - kbdev->pm.backend.poweroff_timer_needed) { + kbdev->pm.backend.poweroff_timer_needed) { /* If power policy is keeping cores on despite there being no * active contexts then disable poweroff timer as it isn't * required. @@ -535,17 +504,11 @@ void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) /* Ensure timer does not power off wanted cores and make sure to power * off unwanted cores */ - if (kbdev->pm.backend.shader_poweroff_pending || - kbdev->pm.backend.tiler_poweroff_pending) { + if (kbdev->pm.backend.shader_poweroff_pending != 0) { kbdev->pm.backend.shader_poweroff_pending &= ~(kbdev->pm.backend.desired_shader_state & desired_bitmap); - kbdev->pm.backend.tiler_poweroff_pending &= - ~(kbdev->pm.backend.desired_tiler_state & - desired_tiler_bitmap); - - if (!kbdev->pm.backend.shader_poweroff_pending && - !kbdev->pm.backend.tiler_poweroff_pending) + if (kbdev->pm.backend.shader_poweroff_pending == 0) kbdev->pm.backend.shader_poweroff_pending_time = 0; } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c index d965033905ca6..9d3eb10bd3c9d 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,12 +36,7 @@ static struct thermal_zone_device *gpu_tz; static unsigned long model_static_power(unsigned long voltage) { -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) - unsigned long temperature; -#else - int temperature; -#endif - unsigned long temp; + int temperature, temp; unsigned long temp_squared, temp_cubed, temp_scaling_factor; const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10; @@ -90,11 +85,7 @@ static unsigned long model_dynamic_power(unsigned long freq, return (dynamic_coefficient * v2 * f_mhz) / 1000000; /* mW */ } -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) -struct devfreq_cooling_ops power_model_simple_ops = { -#else struct devfreq_cooling_power power_model_simple_ops = { -#endif .get_static_power = model_static_power, .get_dynamic_power = model_dynamic_power, }; @@ -159,7 +150,7 @@ int kbase_power_model_simple_init(struct kbase_device *kbdev) dynamic_coefficient = (((dynamic_power * 1000) / voltage_squared) * 1000) / frequency; - if (of_property_read_u32_array(power_model_node, "ts", (u32 *)ts, 4)) { + if (of_property_read_u32_array(power_model_node, "ts", ts, 4)) { dev_err(kbdev->dev, "ts in power_model not available\n"); return -EINVAL; } diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h index 9b5e69a9323ba..17eede4d917c0 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_power_model_simple.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -38,10 +38,6 @@ */ int kbase_power_model_simple_init(struct kbase_device *kbdev); -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) -extern struct devfreq_cooling_ops power_model_simple_ops; -#else extern struct devfreq_cooling_power power_model_simple_ops; -#endif #endif /* _BASE_POWER_MODEL_SIMPLE_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c index d992989123e8f..4bcde85f3ee13 100644 --- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -74,10 +74,9 @@ void kbase_wait_write_flush(struct kbase_context *kctx) { u32 base_count = 0; - /* - * The caller must be holding onto the kctx or the call is from - * userspace. - */ + /* A suspend won't happen here, because we're in a syscall from a + * userspace thread */ + kbase_pm_context_active(kctx->kbdev); kbase_pm_request_gpu_cycle_counter(kctx->kbdev); diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h index f7c0ff6749060..2102f43348cb2 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,7 +27,6 @@ enum base_hw_feature { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, BASE_HW_FEATURE_33BIT_VA, - BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, BASE_HW_FEATURE_MRT, BASE_HW_FEATURE_BRNDOUT_CC, @@ -47,9 +46,6 @@ enum base_hw_feature { BASE_HW_FEATURE_BRNDOUT_KILL, BASE_HW_FEATURE_WARPING, BASE_HW_FEATURE_V4, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_COHERENCY_REG, BASE_HW_FEATURE_END }; @@ -88,7 +84,6 @@ static const enum base_hw_feature base_hw_features_t72x[] = { static const enum base_hw_feature base_hw_features_t76x[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, BASE_HW_FEATURE_BRNDOUT_CC, @@ -106,7 +101,6 @@ static const enum base_hw_feature base_hw_features_t76x[] = { static const enum base_hw_feature base_hw_features_tFxx[] = { BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, BASE_HW_FEATURE_BRNDOUT_CC, @@ -127,7 +121,6 @@ static const enum base_hw_feature base_hw_features_t83x[] = { BASE_HW_FEATURE_33BIT_VA, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_WARPING, BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, @@ -149,7 +142,6 @@ static const enum base_hw_feature base_hw_features_t82x[] = { BASE_HW_FEATURE_33BIT_VA, BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, BASE_HW_FEATURE_WARPING, BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, @@ -167,31 +159,5 @@ static const enum base_hw_feature base_hw_features_t82x[] = { BASE_HW_FEATURE_END }; -static const enum base_hw_feature base_hw_features_tMIx[] = { - BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, - BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, - BASE_HW_FEATURE_XAFFINITY, - BASE_HW_FEATURE_WARPING, - BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, - BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, - BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, - BASE_HW_FEATURE_BRNDOUT_CC, - BASE_HW_FEATURE_BRNDOUT_KILL, - BASE_HW_FEATURE_LD_ST_LEA_TEX, - BASE_HW_FEATURE_LD_ST_TILEBUFFER, - BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, - BASE_HW_FEATURE_MRT, - BASE_HW_FEATURE_MSAA_16X, - BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, - BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, - BASE_HW_FEATURE_T7XX_PAIRING_RULES, - BASE_HW_FEATURE_TEST4_DATUM_MODE, - BASE_HW_FEATURE_FLUSH_REDUCTION, - BASE_HW_FEATURE_PROTECTED_MODE, - BASE_HW_FEATURE_COHERENCY_REG, - BASE_HW_FEATURE_END -}; - - #endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h index 149f44cb8674a..66c2dc76fdb36 100644 --- a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,7 +71,6 @@ enum base_hw_issue { BASE_HW_ISSUE_10487, BASE_HW_ISSUE_10607, BASE_HW_ISSUE_10632, - BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10676, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10684, @@ -91,7 +90,6 @@ enum base_hw_issue { BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -102,17 +100,7 @@ enum base_hw_issue { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_7940, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TMIX_8138, - BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -165,7 +153,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { BASE_HW_ISSUE_10487, BASE_HW_ISSUE_10607, BASE_HW_ISSUE_10632, - BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10676, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10684, @@ -179,9 +166,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_3964, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -207,7 +192,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { BASE_HW_ISSUE_10487, BASE_HW_ISSUE_10607, BASE_HW_ISSUE_10632, - BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10676, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10684, @@ -219,9 +203,7 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -244,7 +226,6 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { BASE_HW_ISSUE_10487, BASE_HW_ISSUE_10607, BASE_HW_ISSUE_10632, - BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10676, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10684, @@ -255,10 +236,8 @@ static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { BASE_HW_ISSUE_11020, BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -273,7 +252,6 @@ static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = { BASE_HW_ISSUE_10487, BASE_HW_ISSUE_10607, BASE_HW_ISSUE_10632, - BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10676, BASE_HW_ISSUE_10682, BASE_HW_ISSUE_10684, @@ -289,7 +267,6 @@ static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = { BASE_HW_ISSUE_11035, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -300,7 +277,6 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, BASE_HW_ISSUE_10472, - BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10684, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -312,10 +288,8 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -324,7 +298,6 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = { BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, BASE_HW_ISSUE_10472, - BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10684, BASE_HW_ISSUE_10821, BASE_HW_ISSUE_10883, @@ -334,7 +307,6 @@ static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = { BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_END @@ -349,7 +321,6 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -360,11 +331,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -377,7 +344,6 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -388,11 +354,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -403,7 +365,6 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -414,11 +375,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -431,7 +388,6 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { BASE_HW_ISSUE_11024, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -442,11 +398,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -457,7 +409,6 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_26, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, @@ -468,11 +419,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -483,7 +430,6 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -491,11 +437,7 @@ static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -503,7 +445,6 @@ static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { BASE_HW_ISSUE_6402, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, - BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10684, BASE_HW_ISSUE_10797, BASE_HW_ISSUE_10821, @@ -511,10 +452,8 @@ static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -522,7 +461,6 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { BASE_HW_ISSUE_6402, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, - BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10684, BASE_HW_ISSUE_10797, BASE_HW_ISSUE_10821, @@ -530,10 +468,8 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -541,7 +477,6 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = { BASE_HW_ISSUE_6402, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, - BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10684, BASE_HW_ISSUE_10797, BASE_HW_ISSUE_10821, @@ -549,10 +484,8 @@ static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = { BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_END }; @@ -561,13 +494,11 @@ static const enum base_hw_issue base_hw_issues_model_t72x[] = { BASE_HW_ISSUE_6402, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10471, - BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10797, BASE_HW_ISSUE_11042, BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -584,10 +515,6 @@ static const enum base_hw_issue base_hw_issues_model_t76x[] = { BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -598,7 +525,6 @@ static const enum base_hw_issue base_hw_issues_model_t60x[] = { BASE_HW_ISSUE_8778, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10472, - BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10931, BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11020, @@ -606,7 +532,6 @@ static const enum base_hw_issue base_hw_issues_model_t60x[] = { BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -616,7 +541,6 @@ static const enum base_hw_issue base_hw_issues_model_t62x[] = { BASE_HW_ISSUE_6402, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_10472, - BASE_HW_ISSUE_10649, BASE_HW_ISSUE_10931, BASE_HW_ISSUE_11012, BASE_HW_ISSUE_11020, @@ -625,7 +549,6 @@ static const enum base_hw_issue base_hw_issues_model_t62x[] = { BASE_HW_ISSUE_11051, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, - BASE_HW_ISSUE_T76X_3964, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -636,7 +559,6 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -644,11 +566,7 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -658,18 +576,13 @@ static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -679,16 +592,12 @@ static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -698,16 +607,12 @@ static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -719,10 +624,6 @@ static const enum base_hw_issue base_hw_issues_model_tFRx[] = { BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -733,18 +634,13 @@ static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3964, BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -754,16 +650,12 @@ static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -773,16 +665,12 @@ static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3966, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -794,9 +682,6 @@ static const enum base_hw_issue base_hw_issues_model_t86x[] = { BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_T76X_3982, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -807,7 +692,6 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -815,9 +699,6 @@ static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -827,16 +708,12 @@ static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -844,14 +721,11 @@ static const enum base_hw_issue base_hw_issues_model_t83x[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, GPUCORE_1619, BASE_HW_ISSUE_END }; @@ -862,7 +736,6 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -870,10 +743,6 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3964, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -883,7 +752,6 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, @@ -891,9 +759,6 @@ static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -903,16 +768,12 @@ static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = { BASE_HW_ISSUE_10883, BASE_HW_ISSUE_10946, BASE_HW_ISSUE_11051, - BASE_HW_ISSUE_11054, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, BASE_HW_ISSUE_T76X_3953, BASE_HW_ISSUE_T76X_3960, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, BASE_HW_ISSUE_END }; @@ -920,68 +781,15 @@ static const enum base_hw_issue base_hw_issues_model_t82x[] = { BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_11051, + BASE_HW_ISSUE_T76X_1909, BASE_HW_ISSUE_T76X_1963, BASE_HW_ISSUE_T76X_3086, BASE_HW_ISSUE_T76X_3700, BASE_HW_ISSUE_T76X_3793, - BASE_HW_ISSUE_T76X_3979, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, - GPUCORE_1619, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3953, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TMIX_8138, - BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_tMIx_r0p0[] = { - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_10682, - BASE_HW_ISSUE_10821, - BASE_HW_ISSUE_11054, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_7940, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TMIX_8138, - BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, - BASE_HW_ISSUE_END -}; - -static const enum base_hw_issue base_hw_issues_model_tMIx[] = { - BASE_HW_ISSUE_5736, - BASE_HW_ISSUE_9435, - BASE_HW_ISSUE_T76X_3700, - BASE_HW_ISSUE_T76X_3982, - BASE_HW_ISSUE_TMIX_7891, - BASE_HW_ISSUE_TMIX_7940, - BASE_HW_ISSUE_TMIX_8042, - BASE_HW_ISSUE_TMIX_8133, - BASE_HW_ISSUE_TMIX_8138, - BASE_HW_ISSUE_TMIX_8206, - BASE_HW_ISSUE_TMIX_8343, GPUCORE_1619, BASE_HW_ISSUE_END }; - - #endif /* _BASE_HWCONFIG_ISSUES_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h index 749dd9a1cc9ed..212100dbff634 100644 --- a/drivers/gpu/arm/midgard/mali_base_kernel.h +++ b/drivers/gpu/arm/midgard/mali_base_kernel.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -42,14 +42,7 @@ /* Support UK9 IOCTLS */ #define BASE_LEGACY_UK9_SUPPORT 1 -/* Support UK10_2 IOCTLS */ -#define BASE_LEGACY_UK10_2_SUPPORT 1 - -typedef struct base_mem_handle { - struct { - u64 handle; - } basep; -} base_mem_handle; +typedef u64 base_mem_handle; #include "mali_base_mem_priv.h" #include "mali_kbase_profiling_gator_api.h" @@ -69,10 +62,6 @@ typedef struct base_mem_handle { #define BASEP_JD_SEM_MASK_IN_WORD(x) (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1))) #define BASEP_JD_SEM_ARRAY_SIZE BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT) -/* Set/reset values for a software event */ -#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1) -#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0) - #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3 #define BASE_MAX_COHERENT_GROUPS 16 @@ -173,13 +162,8 @@ enum { /* IN */ BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence Outer shareable, required. */ - BASE_MEM_SECURE = (1U << 16), /**< Secure memory */ - BASE_MEM_DONT_NEED = (1U << 17), /**< Not needed physical - memory */ - BASE_MEM_IMPORT_SHARED = (1U << 18), /**< Must use shared CPU/GPU zone - (SAME_VA zone) but doesn't - require the addresses to - be the same */ + BASE_MEM_SECURE = (1U << 16) /**< Secure memory */ + }; /** @@ -187,7 +171,7 @@ enum { * * Must be kept in sync with the ::base_mem_alloc_flags flags */ -#define BASE_MEM_FLAGS_NR_BITS 19 +#define BASE_MEM_FLAGS_NR_BITS 17 /** * A mask for all output bits, excluding IN/OUT bits. @@ -200,13 +184,6 @@ enum { #define BASE_MEM_FLAGS_INPUT_MASK \ (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) -/** - * A mask for all the flags which are modifiable via the base_mem_set_flags - * interface. - */ -#define BASE_MEM_FLAGS_MODIFIABLE \ - (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ - BASE_MEM_COHERENT_LOCAL) /** * enum base_mem_import_type - Memory types supported by @a base_mem_import @@ -247,32 +224,14 @@ struct base_mem_import_user_buffer { }; /** - * @brief Invalid memory handle. - * - * Return value from functions returning @ref base_mem_handle on error. - * - * @warning @ref base_mem_handle_new_invalid must be used instead of this macro - * in C++ code or other situations where compound literals cannot be used. + * @brief Invalid memory handle type. + * Return value from functions returning @a base_mem_handle on error. */ -#define BASE_MEM_INVALID_HANDLE ((base_mem_handle) { {BASEP_MEM_INVALID_HANDLE} }) - -/** - * @brief Special write-alloc memory handle. - * - * A special handle is used to represent a region where a special page is mapped - * with a write-alloc cache setup, typically used when the write result of the - * GPU isn't needed, but the GPU must write anyway. - * - * @warning @ref base_mem_handle_new_write_alloc must be used instead of this macro - * in C++ code or other situations where compound literals cannot be used. - */ -#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE ((base_mem_handle) { {BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE} }) - -#define BASEP_MEM_INVALID_HANDLE (0ull << 12) +#define BASE_MEM_INVALID_HANDLE (0ull << 12) #define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) #define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) #define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) -#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) +#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) /* reserved handles ..-64< for future special handles */ #define BASE_MEM_COOKIE_BASE (64ul << 12) #define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ @@ -281,7 +240,6 @@ struct base_mem_import_user_buffer { /* Mask to detect 4GB boundary alignment */ #define BASE_MEM_MASK_4GB 0xfffff000UL - /* Bit mask of cookies used for for memory allocation setup */ #define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ @@ -293,6 +251,7 @@ typedef enum base_backing_threshold_status { BASE_BACKING_THRESHOLD_OK = 0, /**< Resize successful */ BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE = -1, /**< Not a growable tmem object */ BASE_BACKING_THRESHOLD_ERROR_OOM = -2, /**< Increase failed due to an out-of-memory condition */ + BASE_BACKING_THRESHOLD_ERROR_MAPPED = -3, /**< Resize attempted on buffer while it was mapped, which is not permitted */ BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */ } base_backing_threshold_status; @@ -396,28 +355,6 @@ struct base_mem_aliasing_info { u64 length; }; -/** - * struct base_jit_alloc_info - Structure which describes a JIT allocation - * request. - * @gpu_alloc_addr: The GPU virtual address to write the JIT - * allocated GPU virtual address to. - * @va_pages: The minimum number of virtual pages required. - * @commit_pages: The minimum number of physical pages which - * should back the allocation. - * @extent: Granularity of physical pages to grow the - * allocation by during a fault. - * @id: Unique ID provided by the caller, this is used - * to pair allocation and free requests. - * Zero is not a valid value. - */ -struct base_jit_alloc_info { - u64 gpu_alloc_addr; - u64 va_pages; - u64 commit_pages; - u64 extent; - u8 id; -}; - /** * @brief Job dependency type. * @@ -444,20 +381,11 @@ typedef u8 base_jd_dep_type; * Special case is ::BASE_JD_REQ_DEP, which is used to express complex * dependencies, and that doesn't execute anything on the hardware. */ -typedef u32 base_jd_core_req; +typedef u16 base_jd_core_req; /* Requirements that come from the HW */ - -/** - * No requirement, dependency only - */ -#define BASE_JD_REQ_DEP ((base_jd_core_req)0) - -/** - * Requires fragment shaders - */ -#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0) - +#define BASE_JD_REQ_DEP 0 /**< No requirement, dependency only */ +#define BASE_JD_REQ_FS (1U << 0) /**< Requires fragment shaders */ /** * Requires compute shaders * This covers any of the following Midgard Job types: @@ -469,38 +397,28 @@ typedef u32 base_jd_core_req; * job is specifically just the "Compute Shader" job type, and not the "Vertex * Shader" nor the "Geometry Shader" job type. */ -#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1) -#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2) /**< Requires tiling */ -#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3) /**< Requires cache flushes */ -#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4) /**< Requires value writeback */ +#define BASE_JD_REQ_CS (1U << 1) +#define BASE_JD_REQ_T (1U << 2) /**< Requires tiling */ +#define BASE_JD_REQ_CF (1U << 3) /**< Requires cache flushes */ +#define BASE_JD_REQ_V (1U << 4) /**< Requires value writeback */ /* SW-only requirements - the HW does not expose these as part of the job slot capabilities */ /* Requires fragment job with AFBC encoding */ -#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13) - -/** - * SW-only requirement: coalesce completion events. - * If this bit is set then completion of this atom will not cause an event to - * be sent to userspace, whether successful or not; completion events will be - * deferred until an atom completes which does not have this bit set. - * - * This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES. - */ -#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5) +#define BASE_JD_REQ_FS_AFBC (1U << 13) /** * SW Only requirement: the job chain requires a coherent core group. We don't * mind which coherent core group is used. */ -#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6) +#define BASE_JD_REQ_COHERENT_GROUP (1U << 6) /** * SW Only requirement: The performance counters should be enabled only when * they are needed, to reduce power consumption. */ -#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7) +#define BASE_JD_REQ_PERMON (1U << 7) /** * SW Only requirement: External resources are referenced by this atom. @@ -508,16 +426,14 @@ typedef u32 base_jd_core_req; * but should instead be part of a NULL jobs inserted into the dependency tree. * The first pre_dep object must be configured for the external resouces to use, * the second pre_dep object can be used to create other dependencies. - * - * This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE. */ -#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8) +#define BASE_JD_REQ_EXTERNAL_RESOURCES (1U << 8) /** * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted * to the hardware but will cause some action to happen within the driver */ -#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9) +#define BASE_JD_REQ_SOFT_JOB (1U << 9) #define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) #define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) @@ -560,66 +476,6 @@ typedef u32 base_jd_core_req; * - Priority is inherited from the replay job. */ #define BASE_JD_REQ_SOFT_REPLAY (BASE_JD_REQ_SOFT_JOB | 0x4) -/** - * SW only requirement: event wait/trigger job. - * - * - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set. - * - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the - * other waiting jobs. It completes immediately. - * - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it - * possible for other jobs to wait upon. It completes immediately. - */ -#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5) -#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6) -#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7) - -#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8) - -/** - * SW only requirement: Just In Time allocation - * - * This job requests a JIT allocation based on the request in the - * @base_jit_alloc_info structure which is passed via the jc element of - * the atom. - * - * It should be noted that the id entry in @base_jit_alloc_info must not - * be reused until it has been released via @BASE_JD_REQ_SOFT_JIT_FREE. - * - * Should this soft job fail it is expected that a @BASE_JD_REQ_SOFT_JIT_FREE - * soft job to free the JIT allocation is still made. - * - * The job will complete immediately. - */ -#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9) -/** - * SW only requirement: Just In Time free - * - * This job requests a JIT allocation created by @BASE_JD_REQ_SOFT_JIT_ALLOC - * to be freed. The ID of the JIT allocation is passed via the jc element of - * the atom. - * - * The job will complete immediately. - */ -#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa) - -/** - * SW only requirement: Map external resource - * - * This job requests external resource(s) are mapped once the dependencies - * of the job have been satisfied. The list of external resources are - * passed via the jc element of the atom which is a pointer to a - * @base_external_resource_list. - */ -#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb) -/** - * SW only requirement: Unmap external resource - * - * This job requests external resource(s) are unmapped once the dependencies - * of the job has been satisfied. The list of external resources are - * passed via the jc element of the atom which is a pointer to a - * @base_external_resource_list. - */ -#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc) /** * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) @@ -629,79 +485,54 @@ typedef u32 base_jd_core_req; * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. */ -#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10) +#define BASE_JD_REQ_ONLY_COMPUTE (1U << 10) /** * HW Requirement: Use the base_jd_atom::device_nr field to specify a * particular core group * - * If both @ref BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority + * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority * - * This is only guaranteed to work for @ref BASE_JD_REQ_ONLY_COMPUTE atoms. + * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms. * * If the core availability policy is keeping the required core group turned off, then - * the job will fail with a @ref BASE_JD_EVENT_PM_EVENT error code. + * the job will fail with a BASE_JD_EVENT_PM_EVENT error code. */ -#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11) +#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP (1U << 11) /** * SW Flag: If this bit is set then the successful completion of this atom * will not cause an event to be sent to userspace */ -#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12) +#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE (1U << 12) /** * SW Flag: If this bit is set then completion of this atom will not cause an * event to be sent to userspace, whether successful or not. */ -#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14) +#define BASEP_JD_REQ_EVENT_NEVER (1U << 14) /** - * SW Flag: Skip GPU cache clean and invalidation before starting a GPU job. - * - * If this bit is set then the GPU's cache will not be cleaned and invalidated - * until a GPU job starts which does not have this bit set or a job completes - * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use if - * the CPU may have written to memory addressed by the job since the last job - * without this bit set was submitted. - */ -#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15) +* These requirement bits are currently unused in base_jd_core_req (currently a u16) +*/ -/** - * SW Flag: Skip GPU cache clean and invalidation after a GPU job completes. - * - * If this bit is set then the GPU's cache will not be cleaned and invalidated - * until a GPU job completes which does not have this bit set or a job starts - * which does not have the @ref BASE_JD_REQ_SKIP_CACHE_START bti set. Do not use if - * the CPU may read from or partially overwrite memory addressed by the job - * before the next job without this bit set completes. - */ -#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16) +#define BASEP_JD_REQ_RESERVED_BIT5 (1U << 5) +#define BASEP_JD_REQ_RESERVED_BIT15 (1U << 15) /** - * These requirement bits are currently unused in base_jd_core_req - */ -#define BASEP_JD_REQ_RESERVED \ - (~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \ - BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \ - BASE_JD_REQ_EVENT_COALESCE | \ - BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \ - BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \ - BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END)) +* Mask of all the currently unused requirement bits in base_jd_core_req. +*/ + +#define BASEP_JD_REQ_RESERVED (BASEP_JD_REQ_RESERVED_BIT5 | \ + BASEP_JD_REQ_RESERVED_BIT15) /** * Mask of all bits in base_jd_core_req that control the type of the atom. * * This allows dependency only atoms to have flags set */ -#define BASE_JD_REQ_ATOM_TYPE \ - (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \ - BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE) - -/** - * Mask of all bits in base_jd_core_req that control the type of a soft job. - */ -#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f) +#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED | BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\ + BASE_JD_REQ_EXTERNAL_RESOURCES | BASEP_JD_REQ_EVENT_NEVER)) /** * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which @@ -805,26 +636,18 @@ struct base_dependency { base_jd_dep_type dependency_type; /**< Dependency type */ }; -/* This structure has changed since UK 10.2 for which base_jd_core_req was a u16 value. - * In order to keep the size of the structure same, padding field has been adjusted - * accordingly and core_req field of a u32 type (to which UK 10.3 base_jd_core_req defines) - * is added at the end of the structure. Place in the structure previously occupied by u16 core_req - * is kept but renamed to compat_core_req and as such it can be used in ioctl call for job submission - * as long as UK 10.2 legacy is supported. Once when this support ends, this field can be left - * for possible future use. */ typedef struct base_jd_atom_v2 { u64 jc; /**< job-chain GPU address */ struct base_jd_udata udata; /**< user data */ kbase_pointer extres_list; /**< list of external resources */ u16 nr_extres; /**< nr of external resources */ - u16 compat_core_req; /**< core requirements which correspond to the legacy support for UK 10.2 */ + base_jd_core_req core_req; /**< core requirements */ struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field, this is done in order to reduce possibility of improper assigment of a dependency field */ base_atom_id atom_number; /**< unique number to identify the atom */ base_jd_prio prio; /**< Atom priority. Refer to @ref base_jd_prio for more details */ u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ - u8 padding[1]; - base_jd_core_req core_req; /**< core requirements */ + u8 padding[5]; } base_jd_atom_v2; #ifdef BASE_LEGACY_UK6_SUPPORT @@ -833,14 +656,14 @@ struct base_jd_atom_v2_uk6 { struct base_jd_udata udata; /**< user data */ kbase_pointer extres_list; /**< list of external resources */ u16 nr_extres; /**< nr of external resources */ - u16 core_req; /**< core requirements */ + base_jd_core_req core_req; /**< core requirements */ base_atom_id pre_dep[2]; /**< pre-dependencies */ base_atom_id atom_number; /**< unique number to identify the atom */ base_jd_prio prio; /**< priority - smaller is higher priority */ u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ u8 padding[7]; }; -#endif /* BASE_LEGACY_UK6_SUPPORT */ +#endif typedef enum base_external_resource_access { BASE_EXT_RES_ACCESS_SHARED, @@ -851,31 +674,6 @@ typedef struct base_external_resource { u64 ext_resource; } base_external_resource; - -/** - * The maximum number of external resources which can be mapped/unmapped - * in a single request. - */ -#define BASE_EXT_RES_COUNT_MAX 10 - -/** - * struct base_external_resource_list - Structure which describes a list of - * external resources. - * @count: The number of resources. - * @ext_res: Array of external resources which is - * sized at allocation time. - */ -struct base_external_resource_list { - u64 count; - struct base_external_resource ext_res[1]; -}; - -struct base_jd_debug_copy_buffer { - u64 address; - u64 size; - struct base_external_resource extres; -}; - /** * @brief Setter for a dependency structure * @@ -1652,7 +1450,7 @@ typedef struct mali_base_gpu_props { * Flags to pass to ::base_context_init. * Flags can be ORed together to enable multiple things. * - * These share the same space as BASEP_CONTEXT_FLAG_*, and so must + * These share the same space as @ref basep_context_private_flags, and so must * not collide with them. */ enum base_context_create_flags { @@ -1681,7 +1479,7 @@ enum base_context_create_flags { #define BASE_CONTEXT_CREATE_KERNEL_FLAGS \ ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) -/* +/** * Private flags used on the base context * * These start at bit 31, and run down to zero. @@ -1689,8 +1487,10 @@ enum base_context_create_flags { * They share the same space as @ref base_context_create_flags, and so must * not collide with them. */ -/** Private flag tracking whether job descriptor dumping is disabled */ -#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31)) +enum basep_context_private_flags { + /** Private flag tracking whether job descriptor dumping is disabled */ + BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED = (1 << 31) +}; /** @} end group base_user_api_core */ @@ -1763,21 +1563,9 @@ typedef struct base_jd_replay_payload { * Core requirements for the fragment job chain */ base_jd_core_req fragment_core_req; -} base_jd_replay_payload; -#ifdef BASE_LEGACY_UK10_2_SUPPORT -typedef struct base_jd_replay_payload_uk10_2 { - u64 tiler_jc_list; - u64 fragment_jc; - u64 tiler_heap_free; - u16 fragment_hierarchy_mask; - u16 tiler_hierarchy_mask; - u32 hierarchy_default_weight; - u16 tiler_core_req; - u16 fragment_core_req; u8 padding[4]; -} base_jd_replay_payload_uk10_2; -#endif /* BASE_LEGACY_UK10_2_SUPPORT */ +} base_jd_replay_payload; /** * @brief An entry in the linked list of job chains to be replayed. This must diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h index b6d28fea99873..fe58341fdef87 100644 --- a/drivers/gpu/arm/midgard/mali_kbase.h +++ b/drivers/gpu/arm/midgard/mali_kbase.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,7 +45,6 @@ #include #include -#include "mali_kbase_strings.h" #include "mali_kbase_pm.h" #include "mali_kbase_mem_lowlevel.h" #include "mali_kbase_defs.h" @@ -140,6 +139,7 @@ void kbase_jd_done_worker(struct work_struct *data); void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, kbasep_js_atom_done_code done_code); void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom); +void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom); void kbase_jd_zap_context(struct kbase_context *kctx); bool jd_done_nolock(struct kbase_jd_atom *katom, struct list_head *completed_jobs_ctx); @@ -147,7 +147,6 @@ void kbase_jd_free_external_resources(struct kbase_jd_atom *katom); bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom); -void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom); void kbase_job_done(struct kbase_device *kbdev, u32 done); @@ -175,7 +174,7 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, struct kbase_jd_atom *target_katom); void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, - base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom); + u16 core_reqs, struct kbase_jd_atom *target_katom); void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, struct kbase_jd_atom *target_katom); @@ -192,17 +191,9 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom); void kbase_finish_soft_job(struct kbase_jd_atom *katom); void kbase_cancel_soft_job(struct kbase_jd_atom *katom); void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev); -void kbasep_add_waiting_soft_job(struct kbase_jd_atom *katom); -void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom); -int kbase_soft_event_update(struct kbase_context *kctx, - u64 event, - unsigned char new_status); bool kbase_replay_process(struct kbase_jd_atom *katom); -void kbasep_soft_job_timeout_worker(unsigned long data); -void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt); - /* api used internally for register access. Contains validation and tracing */ void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value); int kbase_device_trace_buffer_install( @@ -213,6 +204,7 @@ void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx); void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value); u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset); + void kbasep_as_do_poke(struct work_struct *work); /** Returns the name associated with a Mali exception code diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c index c67b3e97f1af7..2fb5e3edf49f6 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c +++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,11 +43,6 @@ u32 kbase_cache_enabled(u32 flags, u32 nr_pages) void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { -/* Check if kernel is using coherency with GPU */ -#ifdef CONFIG_MALI_COH_KERN - if (kbdev->system_coherency == COHERENCY_ACE) - return; -#endif /* CONFIG_MALI_COH_KERN */ dma_sync_single_for_device(kbdev->dev, handle, size, dir); } @@ -55,10 +50,5 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { -/* Check if kernel is using coherency with GPU */ -#ifdef CONFIG_MALI_COH_KERN - if (kbdev->system_coherency == COHERENCY_ACE) - return; -#endif /* CONFIG_MALI_COH_KERN */ dma_sync_single_for_cpu(kbdev->dev, handle, size, dir); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.h b/drivers/gpu/arm/midgard/mali_kbase_config.h index 356d52bcd7746..816e45c4d02d5 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_config.h +++ b/drivers/gpu/arm/midgard/mali_kbase_config.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,6 +45,13 @@ * @{ */ +#if !MALI_CUSTOMER_RELEASE +/* This flag is set for internal builds so we can run tests without credentials. */ +#define KBASE_HWCNT_DUMP_BYPASS_ROOT 1 +#else +#define KBASE_HWCNT_DUMP_BYPASS_ROOT 0 +#endif + #include /* Forward declaration of struct kbase_device */ @@ -98,7 +105,7 @@ struct kbase_pm_callback_conf { * The system integrator can decide whether to either do nothing, just switch off * the clocks to the GPU, or to completely power down the GPU. * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the - * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). */ void (*power_off_callback)(struct kbase_device *kbdev); @@ -108,7 +115,7 @@ struct kbase_pm_callback_conf { * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback. * If the GPU state has been lost then this function must return 1, otherwise it should return 0. * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the - * platform \em callbacks responsibility to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). * * The return value of the first call to this function is ignored. * @@ -153,7 +160,7 @@ struct kbase_pm_callback_conf { * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback. * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. * - * @return 0 on success, else int error code. + * @return 0 on success, else int erro code. */ int (*power_runtime_init_callback)(struct kbase_device *kbdev); @@ -196,8 +203,8 @@ struct kbase_pm_callback_conf { * Returning 0 will cause the runtime PM core to conduct a regular * autosuspend. * - * This callback is optional and if not provided regular autosuspend - * will be triggered. + * This callback is optional and if not provided regular ausosuspend + * will triggered. * * Note: The Linux kernel must have CONFIG_PM_RUNTIME enabled to use * this feature. diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h index 9b00cce9b2b33..bd48ed96e962f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h +++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -148,6 +148,11 @@ enum { */ #define DEFAULT_AWID_LIMIT KBASE_AID_32 +/** + * Default setting for using alternative hardware counters. + */ +#define DEFAULT_ALTERNATIVE_HWC false + /** * Default UMP device mapping. A UMP_DEVICE__SHIFT value which * defines which UMP device this GPU should be mapped to. @@ -215,12 +220,6 @@ enum { */ #define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ -/* - * Default timeout for some software jobs, after which the software event wait - * jobs will be cancelled. - */ -#define DEFAULT_JS_SOFT_JOB_TIMEOUT ((u32)3000) /* 3s */ - /* * Default minimum number of scheduling ticks before the GPU is reset to clear a * "stuck" job diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c index 344a1f16de8a0..7989799639375 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_context.c +++ b/drivers/gpu/arm/midgard/mali_kbase_context.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,7 +23,8 @@ #include #include -#include +#include + /** * kbase_create_context() - Create a kernel base context. @@ -64,8 +65,6 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) kctx->process_mm = NULL; atomic_set(&kctx->nonmapped_pages, 0); kctx->slots_pullable = 0; - kctx->tgid = current->tgid; - kctx->pid = current->pid; err = kbase_mem_pool_init(&kctx->mem_pool, kbdev->mem_pool_max_size_default, @@ -73,15 +72,11 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) if (err) goto free_kctx; - err = kbase_mem_evictable_init(kctx); - if (err) - goto free_pool; - atomic_set(&kctx->used_pages, 0); err = kbase_jd_init(kctx); if (err) - goto deinit_evictable; + goto free_pool; err = kbasep_js_kctx_init(kctx); if (err) @@ -91,22 +86,16 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) if (err) goto free_jd; - atomic_set(&kctx->drain_pending, 0); - mutex_init(&kctx->reg_lock); INIT_LIST_HEAD(&kctx->waiting_soft_jobs); - spin_lock_init(&kctx->waiting_soft_jobs_lock); #ifdef CONFIG_KDS INIT_LIST_HEAD(&kctx->waiting_kds_resource); #endif - err = kbase_dma_fence_init(kctx); - if (err) - goto free_event; err = kbase_mmu_init(kctx); if (err) - goto term_dma_fence; + goto free_event; kctx->pgd = kbase_mmu_alloc_pgd(kctx); if (!kctx->pgd) @@ -116,6 +105,8 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) if (!kctx->aliasing_sink_page) goto no_sink_page; + kctx->tgid = current->tgid; + kctx->pid = current->pid; init_waitqueue_head(&kctx->event_queue); kctx->cookies = KBASE_COOKIE_MASK; @@ -124,14 +115,6 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) err = kbase_region_tracker_init(kctx); if (err) goto no_region_tracker; - - err = kbase_sticky_resource_init(kctx); - if (err) - goto no_sticky; - - err = kbase_jit_init(kctx); - if (err) - goto no_jit; #ifdef CONFIG_GPU_TRACEPOINTS atomic_set(&kctx->jctx.work_id, 0); #endif @@ -143,18 +126,8 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) mutex_init(&kctx->vinstr_cli_lock); - setup_timer(&kctx->soft_job_timeout, - kbasep_soft_job_timeout_worker, - (uintptr_t)kctx); - return kctx; -no_jit: - kbase_gpu_vm_lock(kctx); - kbase_sticky_resource_term(kctx); - kbase_gpu_vm_unlock(kctx); -no_sticky: - kbase_region_tracker_term(kctx); no_region_tracker: kbase_mem_pool_free(&kctx->mem_pool, kctx->aliasing_sink_page, false); no_sink_page: @@ -164,16 +137,12 @@ kbase_create_context(struct kbase_device *kbdev, bool is_compat) kbase_gpu_vm_unlock(kctx); free_mmu: kbase_mmu_term(kctx); -term_dma_fence: - kbase_dma_fence_term(kctx); free_event: kbase_event_cleanup(kctx); free_jd: /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ kbasep_js_kctx_term(kctx); kbase_jd_exit(kctx); -deinit_evictable: - kbase_mem_evictable_deinit(kctx); free_pool: kbase_mem_pool_term(&kctx->mem_pool); free_kctx: @@ -219,18 +188,8 @@ void kbase_destroy_context(struct kbase_context *kctx) kbase_jd_zap_context(kctx); kbase_event_cleanup(kctx); - /* - * JIT must be terminated before the code below as it must be called - * without the region lock being held. - * The code above ensures no new JIT allocations can be made by - * by the time we get to this point of context tear down. - */ - kbase_jit_term(kctx); - kbase_gpu_vm_lock(kctx); - kbase_sticky_resource_term(kctx); - /* MMU is disabled as part of scheduling out the context */ kbase_mmu_free_pgd(kctx); @@ -260,15 +219,12 @@ void kbase_destroy_context(struct kbase_context *kctx) kbase_pm_context_idle(kbdev); - kbase_dma_fence_term(kctx); - kbase_mmu_term(kctx); pages = atomic_read(&kctx->used_pages); if (pages != 0) dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); - kbase_mem_evictable_deinit(kctx); kbase_mem_pool_term(&kctx->mem_pool); WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c index ee59504cd4e87..ab6f5e45eacce 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c @@ -1,6 +1,7 @@ + /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -17,10 +18,13 @@ #include "platform/rk/custom_log.h" #include +#include #include #include #include +#include #include +#include #include #ifdef CONFIG_MALI_DEVFREQ #include @@ -59,10 +63,12 @@ #include #include #include +#include #include #include /* is_compat_task */ #include #include +#include #ifdef CONFIG_MALI_PLATFORM_DEVICETREE #include #endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ @@ -82,6 +88,9 @@ #include +#ifdef CONFIG_MACH_MANTA +#include +#endif #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) #include @@ -89,9 +98,9 @@ #include #endif +#if defined(CONFIG_MALI_MIPE_ENABLED) #include - -#include +#endif /* GPU IRQ Tags */ #define JOB_IRQ_TAG 0 @@ -103,9 +112,12 @@ static struct kbase_exported_test_data shared_kernel_test_data; EXPORT_SYMBOL(shared_kernel_test_data); #endif /* MALI_UNIT_TEST */ +#define KBASE_DRV_NAME "mali" /** rk_ext : version of rk_ext on mali_ko, aka. rk_ko_ver. */ #define ROCKCHIP_VERSION (13) +static const char kbase_drv_name[] = KBASE_DRV_NAME; + static int kbase_dev_nr; static DEFINE_MUTEX(kbase_dev_list_lock); @@ -148,8 +160,204 @@ static int kds_resource_release(struct inode *inode, struct file *file) } return 0; } + +static int kbasep_kds_allocate_resource_list_data(struct kbase_context *kctx, struct base_external_resource *ext_res, int num_elems, struct kbase_kds_resource_list_data *resources_list) +{ + struct base_external_resource *res = ext_res; + int res_id; + + /* assume we have to wait for all */ + + KBASE_DEBUG_ASSERT(0 != num_elems); + resources_list->kds_resources = kmalloc_array(num_elems, + sizeof(struct kds_resource *), GFP_KERNEL); + + if (NULL == resources_list->kds_resources) + return -ENOMEM; + + KBASE_DEBUG_ASSERT(0 != num_elems); + resources_list->kds_access_bitmap = kzalloc( + sizeof(unsigned long) * + ((num_elems + BITS_PER_LONG - 1) / BITS_PER_LONG), + GFP_KERNEL); + + if (NULL == resources_list->kds_access_bitmap) { + kfree(resources_list->kds_access_bitmap); + return -ENOMEM; + } + + kbase_gpu_vm_lock(kctx); + for (res_id = 0; res_id < num_elems; res_id++, res++) { + int exclusive; + struct kbase_va_region *reg; + struct kds_resource *kds_res = NULL; + + exclusive = res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE; + reg = kbase_region_tracker_find_region_enclosing_address(kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + + /* did we find a matching region object? */ + if (NULL == reg || (reg->flags & KBASE_REG_FREE)) + break; + + /* no need to check reg->alloc as only regions with an alloc has + * a size, and kbase_region_tracker_find_region_enclosing_address + * only returns regions with size > 0 */ + switch (reg->gpu_alloc->type) { +#if defined(CONFIG_UMP) && defined(CONFIG_KDS) + case KBASE_MEM_TYPE_IMPORTED_UMP: + kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle); + break; +#endif /* defined(CONFIG_UMP) && defined(CONFIG_KDS) */ + default: + break; + } + + /* no kds resource for the region ? */ + if (!kds_res) + break; + + resources_list->kds_resources[res_id] = kds_res; + + if (exclusive) + set_bit(res_id, resources_list->kds_access_bitmap); + } + kbase_gpu_vm_unlock(kctx); + + /* did the loop run to completion? */ + if (res_id == num_elems) + return 0; + + /* Clean up as the resource list is not valid. */ + kfree(resources_list->kds_resources); + kfree(resources_list->kds_access_bitmap); + + return -EINVAL; +} + +static bool kbasep_validate_kbase_pointer( + struct kbase_context *kctx, union kbase_pointer *p) +{ + if (kctx->is_compat) { + if (p->compat_value == 0) + return false; + } else { + if (NULL == p->value) + return false; + } + return true; +} + +static int kbase_external_buffer_lock(struct kbase_context *kctx, + struct kbase_uk_ext_buff_kds_data *args, u32 args_size) +{ + struct base_external_resource *ext_res_copy; + size_t ext_resource_size; + int ret = -EINVAL; + int fd = -EBADF; + struct base_external_resource __user *ext_res_user; + int __user *file_desc_usr; + struct kbasep_kds_resource_set_file_data *fdata; + struct kbase_kds_resource_list_data resource_list_data; + + if (args_size != sizeof(struct kbase_uk_ext_buff_kds_data)) + return -EINVAL; + + /* Check user space has provided valid data */ + if (!kbasep_validate_kbase_pointer(kctx, &args->external_resource) || + !kbasep_validate_kbase_pointer(kctx, &args->file_descriptor) || + (0 == args->num_res) || + (args->num_res > KBASE_MAXIMUM_EXT_RESOURCES)) + return -EINVAL; + + ext_resource_size = sizeof(struct base_external_resource) * args->num_res; + + KBASE_DEBUG_ASSERT(0 != ext_resource_size); + ext_res_copy = kmalloc(ext_resource_size, GFP_KERNEL); + + if (!ext_res_copy) + return -EINVAL; +#ifdef CONFIG_COMPAT + if (kctx->is_compat) { + ext_res_user = compat_ptr(args->external_resource.compat_value); + file_desc_usr = compat_ptr(args->file_descriptor.compat_value); + } else { +#endif /* CONFIG_COMPAT */ + ext_res_user = args->external_resource.value; + file_desc_usr = args->file_descriptor.value; +#ifdef CONFIG_COMPAT + } +#endif /* CONFIG_COMPAT */ + + /* Copy the external resources to lock from user space */ + if (copy_from_user(ext_res_copy, ext_res_user, ext_resource_size)) + goto out; + + /* Allocate data to be stored in the file */ + fdata = kmalloc(sizeof(*fdata), GFP_KERNEL); + + if (!fdata) { + ret = -ENOMEM; + goto out; + } + + /* Parse given elements and create resource and access lists */ + ret = kbasep_kds_allocate_resource_list_data(kctx, + ext_res_copy, args->num_res, &resource_list_data); + if (!ret) { + long err; + + fdata->lock = NULL; + + fd = anon_inode_getfd("kds_ext", &kds_resource_fops, fdata, 0); + + err = copy_to_user(file_desc_usr, &fd, sizeof(fd)); + + /* If the file descriptor was valid and we successfully copied + * it to user space, then we can try and lock the requested + * kds resources. + */ + if ((fd >= 0) && (0 == err)) { + struct kds_resource_set *lock; + + lock = kds_waitall(args->num_res, + resource_list_data.kds_access_bitmap, + resource_list_data.kds_resources, + KDS_WAIT_BLOCKING); + + if (!lock) { + ret = -EINVAL; + } else if (IS_ERR(lock)) { + ret = PTR_ERR(lock); + } else { + ret = 0; + fdata->lock = lock; + } + } else { + ret = -EINVAL; + } + + kfree(resource_list_data.kds_resources); + kfree(resource_list_data.kds_access_bitmap); + } + + if (ret) { + /* If the file was opened successfully then close it which will + * clean up the file data, otherwise we clean up the file data + * ourself. + */ + if (fd >= 0) + sys_close(fd); + else + kfree(fdata); + } +out: + kfree(ext_res_copy); + + return ret; +} #endif /* CONFIG_KDS */ +#ifdef CONFIG_MALI_MIPE_ENABLED static void kbase_create_timeline_objects(struct kbase_context *kctx) { struct kbase_device *kbdev = kctx->kbdev; @@ -189,8 +397,7 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx) list_for_each_entry(element, &kbdev->kctx_list, link) { kbase_tlstream_tl_summary_new_ctx( element->kctx, - (u32)(element->kctx->id), - (u32)(element->kctx->tgid)); + (u32)(element->kctx->id)); } /* Before releasing the lock, reset body stream buffers. * This will prevent context creation message to be directed to both @@ -202,6 +409,7 @@ static void kbase_create_timeline_objects(struct kbase_context *kctx) * user space. */ kbase_tlstream_flush_streams(); } +#endif static void kbase_api_handshake(struct uku_version_check_args *version) { @@ -268,34 +476,6 @@ enum mali_error { MALI_ERROR_FUNCTION_FAILED, }; -enum { - inited_mem = (1u << 0), - inited_js = (1u << 1), - inited_pm_runtime_init = (1u << 2), -#ifdef CONFIG_MALI_DEVFREQ - inited_devfreq = (1u << 3), -#endif /* CONFIG_MALI_DEVFREQ */ - inited_tlstream = (1u << 4), - inited_backend_early = (1u << 5), - inited_backend_late = (1u << 6), - inited_device = (1u << 7), - inited_vinstr = (1u << 8), -#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY - inited_ipa = (1u << 9), -#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */ - inited_job_fault = (1u << 10), - inited_misc_register = (1u << 11), - inited_get_device = (1u << 12), - inited_sysfs_group = (1u << 13), - inited_dev_list = (1u << 14), - inited_debugfs = (1u << 15), - inited_gpu_device = (1u << 16), - inited_registers_map = (1u << 17), - inited_power_control = (1u << 19), - inited_buslogger = (1u << 20) -}; - - #ifdef CONFIG_MALI_DEBUG #define INACTIVE_WAIT_MS (5000) @@ -381,18 +561,6 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg /* setup complete, perform normal operation */ switch (id) { - case KBASE_FUNC_MEM_JIT_INIT: - { - struct kbase_uk_mem_jit_init *jit_init = args; - - if (sizeof(*jit_init) != args_size) - goto bad_size; - - if (kbase_region_tracker_init_jit(kctx, - jit_init->va_pages)) - ukh->ret = MALI_ERROR_FUNCTION_FAILED; - break; - } case KBASE_FUNC_MEM_ALLOC: { struct kbase_uk_mem_alloc *mem = args; @@ -401,13 +569,6 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg if (sizeof(*mem) != args_size) goto bad_size; -#if defined(CONFIG_64BIT) - if (!kctx->is_compat) { - /* force SAME_VA if a 64-bit client */ - mem->flags |= BASE_MEM_SAME_VA; - } -#endif - reg = kbase_mem_alloc(kctx, mem->va_pages, mem->commit_pages, mem->extent, &mem->flags, &mem->gpu_va, @@ -434,13 +595,10 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg break; } - if (kbase_mem_import(kctx, - (enum base_mem_import_type) - mem_import->type, - phandle, - &mem_import->gpu_va, - &mem_import->va_pages, - &mem_import->flags)) { + if (kbase_mem_import(kctx, mem_import->type, phandle, + &mem_import->gpu_va, + &mem_import->va_pages, + &mem_import->flags)) { mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID; ukh->ret = MALI_ERROR_FUNCTION_FAILED; } @@ -621,7 +779,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg if (sizeof(*sn) != args_size) goto bad_size; - if (sn->sset.basep_sset.mem_handle.basep.handle & ~PAGE_MASK) { + if (sn->sset.basep_sset.mem_handle & ~PAGE_MASK) { dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid"); ukh->ret = MALI_ERROR_FUNCTION_FAILED; break; @@ -796,6 +954,26 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg break; } + case KBASE_FUNC_EXT_BUFFER_LOCK: + { +#ifdef CONFIG_KDS + ret = kbase_external_buffer_lock(kctx, + (struct kbase_uk_ext_buff_kds_data *)args, + args_size); + switch (ret) { + case 0: + ukh->ret = MALI_ERROR_NONE; + break; + case -ENOMEM: + ukh->ret = MALI_ERROR_OUT_OF_MEMORY; + break; + default: + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + } +#endif /* CONFIG_KDS */ + break; + } + case KBASE_FUNC_SET_TEST_DATA: { #if MALI_UNIT_TEST @@ -897,7 +1075,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg goto bad_size; if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) { - dev_err(kbdev->dev, "buffer too big\n"); + dev_err(kbdev->dev, "buffer too big"); goto out_bad; } @@ -941,6 +1119,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg } #endif /* CONFIG_MALI_NO_MALI */ +#ifdef CONFIG_MALI_MIPE_ENABLED case KBASE_FUNC_TLSTREAM_ACQUIRE: { struct kbase_uk_tlstream_acquire *tlstream_acquire = @@ -1000,6 +1179,7 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg break; } #endif /* MALI_UNIT_TEST */ +#endif /* CONFIG_MALI_MIPE_ENABLED */ case KBASE_FUNC_GET_CONTEXT_ID: { @@ -1009,27 +1189,8 @@ static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 arg break; } - case KBASE_FUNC_SOFT_EVENT_UPDATE: - { - struct kbase_uk_soft_event_update *update = args; - - if (sizeof(*update) != args_size) - goto bad_size; - - if (((update->new_status != BASE_JD_SOFT_EVENT_SET) && - (update->new_status != BASE_JD_SOFT_EVENT_RESET)) || - (update->flags != 0)) - goto out_bad; - - if (kbase_soft_event_update(kctx, update->evt, - update->new_status)) - ukh->ret = MALI_ERROR_FUNCTION_FAILED; - - break; - } - default: - dev_err(kbdev->dev, "unknown ioctl %u\n", id); + dev_err(kbdev->dev, "unknown ioctl %u", id); goto out_bad; } @@ -1046,47 +1207,6 @@ static struct kbase_device *to_kbase_device(struct device *dev) return dev_get_drvdata(dev); } -static int assign_irqs(struct platform_device *pdev) -{ - struct kbase_device *kbdev = to_kbase_device(&pdev->dev); - int i; - - if (!kbdev) - return -ENODEV; - - /* 3 IRQ resources */ - for (i = 0; i < 3; i++) { - struct resource *irq_res; - int irqtag; - - irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); - if (!irq_res) { - dev_err(kbdev->dev, "No IRQ resource at index %d\n", i); - return -ENOENT; - } - -#ifdef CONFIG_OF - if (!strcmp(irq_res->name, "JOB")) { - irqtag = JOB_IRQ_TAG; - } else if (!strcmp(irq_res->name, "MMU")) { - irqtag = MMU_IRQ_TAG; - } else if (!strcmp(irq_res->name, "GPU")) { - irqtag = GPU_IRQ_TAG; - } else { - dev_err(&pdev->dev, "Invalid irq res name: '%s'\n", - irq_res->name); - return -EINVAL; - } -#else - irqtag = i; -#endif /* CONFIG_OF */ - kbdev->irqs[irqtag].irq = irq_res->start; - kbdev->irqs[irqtag].flags = irq_res->flags & IRQF_TRIGGER_MASK; - } - - return 0; -} - /* * API to acquire device list mutex and * return pointer to the device list head @@ -1189,8 +1309,7 @@ static int kbase_open(struct inode *inode, struct file *filp) kbase_mem_pool_debugfs_add(kctx->kctx_dentry, &kctx->mem_pool); - kbase_jit_debugfs_add(kctx); -#endif /* CONFIG_DEBUG_FS */ +#endif /* CONFIG_DEBUGFS */ dev_dbg(kbdev->dev, "created base context\n"); @@ -1202,10 +1321,11 @@ static int kbase_open(struct inode *inode, struct file *filp) mutex_lock(&kbdev->kctx_list_lock); element->kctx = kctx; list_add(&element->link, &kbdev->kctx_list); +#ifdef CONFIG_MALI_MIPE_ENABLED kbase_tlstream_tl_new_ctx( element->kctx, - (u32)(element->kctx->id), - (u32)(element->kctx->tgid)); + (u32)(element->kctx->id)); +#endif mutex_unlock(&kbdev->kctx_list_lock); } else { /* we don't treat this as a fail - just warn about it */ @@ -1226,7 +1346,9 @@ static int kbase_release(struct inode *inode, struct file *filp) struct kbasep_kctx_list_element *element, *tmp; bool found_element = false; +#ifdef CONFIG_MALI_MIPE_ENABLED kbase_tlstream_tl_del_ctx(kctx); +#endif #ifdef CONFIG_DEBUG_FS debugfs_remove_recursive(kctx->kctx_dentry); @@ -1491,12 +1613,11 @@ static unsigned long kbase_get_unmapped_area(struct file *filp, flags); if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) { - info.high_limit = kctx->same_va_end << PAGE_SHIFT; + info.high_limit = 1ul << 33; info.align_mask = 0; info.align_offset = 0; } else { - info.high_limit = min_t(unsigned long, mm->mmap_base, - (kctx->same_va_end << PAGE_SHIFT)); + info.high_limit = mm->mmap_base; if (len >= SZ_2M) { info.align_offset = SZ_2M; info.align_mask = SZ_2M - 1; @@ -1540,6 +1661,7 @@ u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset) } #endif /* !CONFIG_MALI_NO_MALI */ + /** Show callback for the @c power_policy sysfs file. * * This function is called to get the contents of the @c power_policy sysfs @@ -1862,89 +1984,6 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, */ static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); -/** - * set_soft_job_timeout() - Store callback for the soft_job_timeout sysfs - * file. - * - * @dev: The device this sysfs file is for. - * @attr: The attributes of the sysfs file. - * @buf: The value written to the sysfs file. - * @count: The number of bytes written to the sysfs file. - * - * This allows setting the timeout for software jobs. Waiting soft event wait - * jobs will be cancelled after this period expires, while soft fence wait jobs - * will print debug information if the fence debug feature is enabled. - * - * This is expressed in milliseconds. - * - * Return: count if the function succeeded. An error code on failure. - */ -static ssize_t set_soft_job_timeout(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct kbase_device *kbdev; - int soft_job_timeout_ms; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - if ((kstrtoint(buf, 0, &soft_job_timeout_ms) != 0) || - (soft_job_timeout_ms <= 0)) - return -EINVAL; - - atomic_set(&kbdev->js_data.soft_job_timeout_ms, - soft_job_timeout_ms); - - return count; -} - -/** - * show_soft_job_timeout() - Show callback for the soft_job_timeout sysfs - * file. - * - * This will return the timeout for the software jobs. - * - * @dev: The device this sysfs file is for. - * @attr: The attributes of the sysfs file. - * @buf: The output buffer for the sysfs file contents. - * - * Return: The number of bytes output to buf. - */ -static ssize_t show_soft_job_timeout(struct device *dev, - struct device_attribute *attr, - char * const buf) -{ - struct kbase_device *kbdev; - - kbdev = to_kbase_device(dev); - if (!kbdev) - return -ENODEV; - - return scnprintf(buf, PAGE_SIZE, "%i\n", - atomic_read(&kbdev->js_data.soft_job_timeout_ms)); -} - -static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR, - show_soft_job_timeout, set_soft_job_timeout); - -static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms, - int default_ticks, u32 old_ticks) -{ - if (timeout_ms > 0) { - u64 ticks = timeout_ms * 1000000ULL; - do_div(ticks, kbdev->js_data.scheduling_period_ns); - if (!ticks) - return 1; - return ticks; - } else if (timeout_ms < 0) { - return default_ticks; - } else { - return old_ticks; - } -} - /** Store callback for the @c js_timeouts sysfs file. * * This function is called to get the contents of the @c js_timeouts sysfs @@ -1989,45 +2028,99 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr &js_reset_ms_cl, &js_reset_ms_dumping); if (items == 8) { - struct kbasep_js_device_data *js_data = &kbdev->js_data; - unsigned long flags; - - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); - -#define UPDATE_TIMEOUT(ticks_name, ms_name, default) do {\ - js_data->ticks_name = timeout_ms_to_ticks(kbdev, ms_name, \ - default, js_data->ticks_name); \ - dev_dbg(kbdev->dev, "Overriding " #ticks_name \ - " with %lu ticks (%lu ms)\n", \ - (unsigned long)js_data->ticks_name, \ - ms_name); \ - } while (0) - - UPDATE_TIMEOUT(soft_stop_ticks, js_soft_stop_ms, - DEFAULT_JS_SOFT_STOP_TICKS); - UPDATE_TIMEOUT(soft_stop_ticks_cl, js_soft_stop_ms_cl, - DEFAULT_JS_SOFT_STOP_TICKS_CL); - UPDATE_TIMEOUT(hard_stop_ticks_ss, js_hard_stop_ms_ss, - kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ? - DEFAULT_JS_HARD_STOP_TICKS_SS_8408 : - DEFAULT_JS_HARD_STOP_TICKS_SS); - UPDATE_TIMEOUT(hard_stop_ticks_cl, js_hard_stop_ms_cl, - DEFAULT_JS_HARD_STOP_TICKS_CL); - UPDATE_TIMEOUT(hard_stop_ticks_dumping, - js_hard_stop_ms_dumping, - DEFAULT_JS_HARD_STOP_TICKS_DUMPING); - UPDATE_TIMEOUT(gpu_reset_ticks_ss, js_reset_ms_ss, - kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408) ? - DEFAULT_JS_RESET_TICKS_SS_8408 : - DEFAULT_JS_RESET_TICKS_SS); - UPDATE_TIMEOUT(gpu_reset_ticks_cl, js_reset_ms_cl, - DEFAULT_JS_RESET_TICKS_CL); - UPDATE_TIMEOUT(gpu_reset_ticks_dumping, js_reset_ms_dumping, - DEFAULT_JS_RESET_TICKS_DUMPING); - - kbase_js_set_timeouts(kbdev); - - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + u64 ticks; + + if (js_soft_stop_ms >= 0) { + ticks = js_soft_stop_ms * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_soft_stop_ticks = ticks; + } else { + kbdev->js_soft_stop_ticks = -1; + } + + if (js_soft_stop_ms_cl >= 0) { + ticks = js_soft_stop_ms_cl * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_soft_stop_ticks_cl = ticks; + } else { + kbdev->js_soft_stop_ticks_cl = -1; + } + + if (js_hard_stop_ms_ss >= 0) { + ticks = js_hard_stop_ms_ss * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_hard_stop_ticks_ss = ticks; + } else { + kbdev->js_hard_stop_ticks_ss = -1; + } + + if (js_hard_stop_ms_cl >= 0) { + ticks = js_hard_stop_ms_cl * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_hard_stop_ticks_cl = ticks; + } else { + kbdev->js_hard_stop_ticks_cl = -1; + } + + if (js_hard_stop_ms_dumping >= 0) { + ticks = js_hard_stop_ms_dumping * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_hard_stop_ticks_dumping = ticks; + } else { + kbdev->js_hard_stop_ticks_dumping = -1; + } + + if (js_reset_ms_ss >= 0) { + ticks = js_reset_ms_ss * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_reset_ticks_ss = ticks; + } else { + kbdev->js_reset_ticks_ss = -1; + } + + if (js_reset_ms_cl >= 0) { + ticks = js_reset_ms_cl * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_reset_ticks_cl = ticks; + } else { + kbdev->js_reset_ticks_cl = -1; + } + + if (js_reset_ms_dumping >= 0) { + ticks = js_reset_ms_dumping * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_reset_ticks_dumping = ticks; + } else { + kbdev->js_reset_ticks_dumping = -1; + } + + kbdev->js_timeouts_updated = true; + + dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS with %lu ticks (%lu ms)\n", + (unsigned long)kbdev->js_soft_stop_ticks, + js_soft_stop_ms); + dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS_CL with %lu ticks (%lu ms)\n", + (unsigned long)kbdev->js_soft_stop_ticks_cl, + js_soft_stop_ms_cl); + dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_SS with %lu ticks (%lu ms)\n", + (unsigned long)kbdev->js_hard_stop_ticks_ss, + js_hard_stop_ms_ss); + dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_CL with %lu ticks (%lu ms)\n", + (unsigned long)kbdev->js_hard_stop_ticks_cl, + js_hard_stop_ms_cl); + dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_DUMPING with %lu ticks (%lu ms)\n", + (unsigned long) + kbdev->js_hard_stop_ticks_dumping, + js_hard_stop_ms_dumping); + dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_SS with %lu ticks (%lu ms)\n", + (unsigned long)kbdev->js_reset_ticks_ss, + js_reset_ms_ss); + dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_CL with %lu ticks (%lu ms)\n", + (unsigned long)kbdev->js_reset_ticks_cl, + js_reset_ms_cl); + dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_DUMPING with %lu ticks (%lu ms)\n", + (unsigned long)kbdev->js_reset_ticks_dumping, + js_reset_ms_dumping); return count; } @@ -2038,16 +2131,6 @@ static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr return -EINVAL; } -static unsigned long get_js_timeout_in_ms( - u32 scheduling_period_ns, - u32 ticks) -{ - u64 ms = (u64)ticks * scheduling_period_ns; - - do_div(ms, 1000000UL); - return ms; -} - /** Show callback for the @c js_timeouts sysfs file. * * This function is called to get the contents of the @c js_timeouts sysfs @@ -2064,6 +2147,7 @@ static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *att { struct kbase_device *kbdev; ssize_t ret; + u64 ms; unsigned long js_soft_stop_ms; unsigned long js_soft_stop_ms_cl; unsigned long js_hard_stop_ms_ss; @@ -2072,37 +2156,93 @@ static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *att unsigned long js_reset_ms_ss; unsigned long js_reset_ms_cl; unsigned long js_reset_ms_dumping; + unsigned long ticks; u32 scheduling_period_ns; kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - scheduling_period_ns = kbdev->js_data.scheduling_period_ns; + /* If no contexts have been scheduled since js_timeouts was last written + * to, the new timeouts might not have been latched yet. So check if an + * update is pending and use the new values if necessary. */ + if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0) + scheduling_period_ns = kbdev->js_scheduling_period_ns; + else + scheduling_period_ns = kbdev->js_data.scheduling_period_ns; -#define GET_TIMEOUT(name) get_js_timeout_in_ms(\ - scheduling_period_ns, \ - kbdev->js_data.name) + if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0) + ticks = kbdev->js_soft_stop_ticks; + else + ticks = kbdev->js_data.soft_stop_ticks; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_soft_stop_ms = (unsigned long)ms; - js_soft_stop_ms = GET_TIMEOUT(soft_stop_ticks); - js_soft_stop_ms_cl = GET_TIMEOUT(soft_stop_ticks_cl); - js_hard_stop_ms_ss = GET_TIMEOUT(hard_stop_ticks_ss); - js_hard_stop_ms_cl = GET_TIMEOUT(hard_stop_ticks_cl); - js_hard_stop_ms_dumping = GET_TIMEOUT(hard_stop_ticks_dumping); - js_reset_ms_ss = GET_TIMEOUT(gpu_reset_ticks_ss); - js_reset_ms_cl = GET_TIMEOUT(gpu_reset_ticks_cl); - js_reset_ms_dumping = GET_TIMEOUT(gpu_reset_ticks_dumping); + if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0) + ticks = kbdev->js_soft_stop_ticks_cl; + else + ticks = kbdev->js_data.soft_stop_ticks_cl; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_soft_stop_ms_cl = (unsigned long)ms; -#undef GET_TIMEOUT + if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0) + ticks = kbdev->js_hard_stop_ticks_ss; + else + ticks = kbdev->js_data.hard_stop_ticks_ss; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_hard_stop_ms_ss = (unsigned long)ms; - ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", - js_soft_stop_ms, js_soft_stop_ms_cl, - js_hard_stop_ms_ss, js_hard_stop_ms_cl, - js_hard_stop_ms_dumping, js_reset_ms_ss, - js_reset_ms_cl, js_reset_ms_dumping); + if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0) + ticks = kbdev->js_hard_stop_ticks_cl; + else + ticks = kbdev->js_data.hard_stop_ticks_cl; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_hard_stop_ms_cl = (unsigned long)ms; - if (ret >= PAGE_SIZE) { - buf[PAGE_SIZE - 2] = '\n'; + if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0) + ticks = kbdev->js_hard_stop_ticks_dumping; + else + ticks = kbdev->js_data.hard_stop_ticks_dumping; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_hard_stop_ms_dumping = (unsigned long)ms; + + if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0) + ticks = kbdev->js_reset_ticks_ss; + else + ticks = kbdev->js_data.gpu_reset_ticks_ss; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_reset_ms_ss = (unsigned long)ms; + + if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0) + ticks = kbdev->js_reset_ticks_cl; + else + ticks = kbdev->js_data.gpu_reset_ticks_cl; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_reset_ms_cl = (unsigned long)ms; + + if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0) + ticks = kbdev->js_reset_ticks_dumping; + else + ticks = kbdev->js_data.gpu_reset_ticks_dumping; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_reset_ms_dumping = (unsigned long)ms; + + ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", + js_soft_stop_ms, js_soft_stop_ms_cl, + js_hard_stop_ms_ss, js_hard_stop_ms_cl, + js_hard_stop_ms_dumping, js_reset_ms_ss, + js_reset_ms_cl, js_reset_ms_dumping); + + if (ret >= PAGE_SIZE) { + buf[PAGE_SIZE - 2] = '\n'; buf[PAGE_SIZE - 1] = '\0'; ret = PAGE_SIZE - 1; } @@ -2124,16 +2264,6 @@ static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *att */ static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts); -static u32 get_new_js_timeout( - u32 old_period, - u32 old_ticks, - u32 new_scheduling_period_ns) -{ - u64 ticks = (u64)old_period * (u64)old_ticks; - do_div(ticks, new_scheduling_period_ns); - return ticks?ticks:1; -} - /** * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs * file @@ -2156,15 +2286,12 @@ static ssize_t set_js_scheduling_period(struct device *dev, unsigned int js_scheduling_period; u32 new_scheduling_period_ns; u32 old_period; - struct kbasep_js_device_data *js_data; - unsigned long flags; + u64 ticks; kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - js_data = &kbdev->js_data; - ret = kstrtouint(buf, 0, &js_scheduling_period); if (ret || !js_scheduling_period) { dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n" @@ -2175,39 +2302,86 @@ static ssize_t set_js_scheduling_period(struct device *dev, new_scheduling_period_ns = js_scheduling_period * 1000000; /* Update scheduling timeouts */ - mutex_lock(&js_data->runpool_mutex); - spin_lock_irqsave(&js_data->runpool_irq.lock, flags); + mutex_lock(&kbdev->js_data.runpool_mutex); /* If no contexts have been scheduled since js_timeouts was last written * to, the new timeouts might not have been latched yet. So check if an * update is pending and use the new values if necessary. */ /* Use previous 'new' scheduling period as a base if present. */ - old_period = js_data->scheduling_period_ns; + if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns) + old_period = kbdev->js_scheduling_period_ns; + else + old_period = kbdev->js_data.scheduling_period_ns; + + if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0) + ticks = (u64)kbdev->js_soft_stop_ticks * old_period; + else + ticks = (u64)kbdev->js_data.soft_stop_ticks * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_soft_stop_ticks = ticks ? ticks : 1; + + if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0) + ticks = (u64)kbdev->js_soft_stop_ticks_cl * old_period; + else + ticks = (u64)kbdev->js_data.soft_stop_ticks_cl * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_soft_stop_ticks_cl = ticks ? ticks : 1; -#define SET_TIMEOUT(name) \ - (js_data->name = get_new_js_timeout(\ - old_period, \ - kbdev->js_data.name, \ - new_scheduling_period_ns)) + if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0) + ticks = (u64)kbdev->js_hard_stop_ticks_ss * old_period; + else + ticks = (u64)kbdev->js_data.hard_stop_ticks_ss * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_hard_stop_ticks_ss = ticks ? ticks : 1; + + if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0) + ticks = (u64)kbdev->js_hard_stop_ticks_cl * old_period; + else + ticks = (u64)kbdev->js_data.hard_stop_ticks_cl * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_hard_stop_ticks_cl = ticks ? ticks : 1; + + if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0) + ticks = (u64)kbdev->js_hard_stop_ticks_dumping * old_period; + else + ticks = (u64)kbdev->js_data.hard_stop_ticks_dumping * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_hard_stop_ticks_dumping = ticks ? ticks : 1; - SET_TIMEOUT(soft_stop_ticks); - SET_TIMEOUT(soft_stop_ticks_cl); - SET_TIMEOUT(hard_stop_ticks_ss); - SET_TIMEOUT(hard_stop_ticks_cl); - SET_TIMEOUT(hard_stop_ticks_dumping); - SET_TIMEOUT(gpu_reset_ticks_ss); - SET_TIMEOUT(gpu_reset_ticks_cl); - SET_TIMEOUT(gpu_reset_ticks_dumping); + if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0) + ticks = (u64)kbdev->js_reset_ticks_ss * old_period; + else + ticks = (u64)kbdev->js_data.gpu_reset_ticks_ss * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_reset_ticks_ss = ticks ? ticks : 1; -#undef SET_TIMEOUT + if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0) + ticks = (u64)kbdev->js_reset_ticks_cl * old_period; + else + ticks = (u64)kbdev->js_data.gpu_reset_ticks_cl * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_reset_ticks_cl = ticks ? ticks : 1; - js_data->scheduling_period_ns = new_scheduling_period_ns; + if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0) + ticks = (u64)kbdev->js_reset_ticks_dumping * old_period; + else + ticks = (u64)kbdev->js_data.gpu_reset_ticks_dumping * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_reset_ticks_dumping = ticks ? ticks : 1; - kbase_js_set_timeouts(kbdev); + kbdev->js_scheduling_period_ns = new_scheduling_period_ns; + kbdev->js_timeouts_updated = true; - spin_unlock_irqrestore(&js_data->runpool_irq.lock, flags); - mutex_unlock(&js_data->runpool_mutex); + mutex_unlock(&kbdev->js_data.runpool_mutex); dev_dbg(kbdev->dev, "JS scheduling period: %dms\n", js_scheduling_period); @@ -2238,7 +2412,10 @@ static ssize_t show_js_scheduling_period(struct device *dev, if (!kbdev) return -ENODEV; - period = kbdev->js_data.scheduling_period_ns; + if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0) + period = kbdev->js_scheduling_period_ns; + else + period = kbdev->js_data.scheduling_period_ns; ret = scnprintf(buf, PAGE_SIZE, "%d\n", period / 1000000); @@ -2538,8 +2715,6 @@ static ssize_t kbase_show_gpuinfo(struct device *dev, { .id = GPU_ID_PI_T83X, .name = "Mali-T83x" }, { .id = GPU_ID_PI_T86X, .name = "Mali-T86x" }, { .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" }, - { .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-G71" }, }; const char *product_name = "(Unknown Mali GPU)"; struct kbase_device *kbdev; @@ -2867,42 +3042,26 @@ static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size, set_mem_pool_max_size); -static int kbasep_protected_mode_enter(struct kbase_device *kbdev) -{ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_SET_PROTECTED_MODE, NULL); - return 0; -} -static bool kbasep_protected_mode_supported(struct kbase_device *kbdev) +static int kbasep_secure_mode_init(struct kbase_device *kbdev) { - return true; -} -static struct kbase_protected_ops kbasep_protected_ops = { - .protected_mode_enter = kbasep_protected_mode_enter, - .protected_mode_reset = NULL, - .protected_mode_supported = kbasep_protected_mode_supported, -}; +#ifdef SECURE_CALLBACKS + kbdev->secure_ops = SECURE_CALLBACKS; + kbdev->secure_mode_support = false; -static void kbasep_protected_mode_init(struct kbase_device *kbdev) -{ - kbdev->protected_ops = NULL; + if (kbdev->secure_ops) { + int err; + + /* Make sure secure mode is disabled on startup */ + err = kbdev->secure_ops->secure_mode_disable(kbdev); - if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) { - /* Use native protected ops */ - kbdev->protected_ops = &kbasep_protected_ops; + /* secure_mode_disable() returns -EINVAL if not supported */ + kbdev->secure_mode_support = (err != -EINVAL); } -#ifdef PROTECTED_CALLBACKS - else - kbdev->protected_ops = PROTECTED_CALLBACKS; #endif - if (kbdev->protected_ops) - kbdev->protected_mode_support = - kbdev->protected_ops->protected_mode_supported(kbdev); - else - kbdev->protected_mode_support = false; + return 0; } #ifdef CONFIG_MALI_NO_MALI @@ -2941,146 +3100,11 @@ static int kbase_common_reg_map(struct kbase_device *kbdev) static void kbase_common_reg_unmap(struct kbase_device * const kbdev) { - if (kbdev->reg) { - iounmap(kbdev->reg); - release_mem_region(kbdev->reg_start, kbdev->reg_size); - kbdev->reg = NULL; - kbdev->reg_start = 0; - kbdev->reg_size = 0; - } + iounmap(kbdev->reg); + release_mem_region(kbdev->reg_start, kbdev->reg_size); } #endif /* CONFIG_MALI_NO_MALI */ -static int registers_map(struct kbase_device * const kbdev) -{ - - /* the first memory resource is the physical address of the GPU - * registers */ - struct platform_device *pdev = to_platform_device(kbdev->dev); - struct resource *reg_res; - int err; - - reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!reg_res) { - dev_err(kbdev->dev, "Invalid register resource\n"); - return -ENOENT; - } - - kbdev->reg_start = reg_res->start; - kbdev->reg_size = resource_size(reg_res); - - err = kbase_common_reg_map(kbdev); - if (err) { - dev_err(kbdev->dev, "Failed to map registers\n"); - return err; - } - - return 0; -} - -static void registers_unmap(struct kbase_device *kbdev) -{ - kbase_common_reg_unmap(kbdev); -} - -static int power_control_init(struct platform_device *pdev) -{ - struct kbase_device *kbdev = to_kbase_device(&pdev->dev); - int err = 0; - - if (!kbdev) - return -ENODEV; - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_REGULATOR) - kbdev->regulator = regulator_get_optional(kbdev->dev, "mali"); - if (IS_ERR_OR_NULL(kbdev->regulator)) { - err = PTR_ERR(kbdev->regulator); - kbdev->regulator = NULL; - if (err == -EPROBE_DEFER) { - dev_err(&pdev->dev, "Failed to get regulator\n"); - return err; - } - dev_info(kbdev->dev, - "Continuing without Mali regulator control\n"); - /* Allow probe to continue without regulator */ - } -#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ - - kbdev->clock = clk_get(kbdev->dev, "clk_mali"); - if (IS_ERR_OR_NULL(kbdev->clock)) { - err = PTR_ERR(kbdev->clock); - kbdev->clock = NULL; - if (err == -EPROBE_DEFER) { - dev_err(&pdev->dev, "Failed to get clock\n"); - goto fail; - } - dev_info(kbdev->dev, "Continuing without Mali clock control\n"); - /* Allow probe to continue without clock. */ - } else { - err = clk_prepare(kbdev->clock); - if (err) { - dev_err(kbdev->dev, - "Failed to prepare and enable clock (%d)\n", - err); - goto fail; - } - } - -#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP) - /* Register the OPPs if they are available in device tree */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) - err = dev_pm_opp_of_add_table(kbdev->dev); -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) - err = of_init_opp_table(kbdev->dev); -#else - err = 0; -#endif /* LINUX_VERSION_CODE */ - if (err) - dev_dbg(kbdev->dev, "OPP table not found\n"); -#endif /* CONFIG_OF && CONFIG_PM_OPP */ - - return 0; - -fail: - -if (kbdev->clock != NULL) { - clk_put(kbdev->clock); - kbdev->clock = NULL; -} - -#ifdef CONFIG_REGULATOR - if (NULL != kbdev->regulator) { - regulator_put(kbdev->regulator); - kbdev->regulator = NULL; - } -#endif - - return err; -} - -static void power_control_term(struct kbase_device *kbdev) -{ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) - dev_pm_opp_of_remove_table(kbdev->dev); -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) - of_free_opp_table(kbdev->dev); -#endif - - if (kbdev->clock) { - clk_unprepare(kbdev->clock); - clk_put(kbdev->clock); - kbdev->clock = NULL; - } - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ - && defined(CONFIG_REGULATOR) - if (kbdev->regulator) { - regulator_put(kbdev->regulator); - kbdev->regulator = NULL; - } -#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ -} #ifdef CONFIG_DEBUG_FS @@ -3121,6 +3145,30 @@ MAKE_QUIRK_ACCESSORS(mmu); #endif /* KBASE_GPU_RESET_EN */ +static int kbasep_secure_mode_seq_show(struct seq_file *m, void *p) +{ + struct kbase_device *kbdev = m->private; + + if (!kbdev->secure_mode_support) + seq_puts(m, "unsupported\n"); + else + seq_printf(m, "%s\n", kbdev->secure_mode ? "Y" : "N"); + + return 0; +} + +static int kbasep_secure_mode_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_secure_mode_seq_show, in->i_private); +} + +static const struct file_operations kbasep_secure_mode_debugfs_fops = { + .open = kbasep_secure_mode_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int kbase_device_debugfs_init(struct kbase_device *kbdev) { struct dentry *debugfs_ctx_defaults_directory; @@ -3156,7 +3204,6 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) kbase_debug_job_fault_debugfs_init(kbdev); kbasep_gpu_memory_debugfs_init(kbdev); - kbase_as_fault_debugfs_init(kbdev); #if KBASE_GPU_RESET_EN debugfs_create_file("quirks_sc", 0644, kbdev->mali_debugfs_directory, kbdev, @@ -3187,6 +3234,10 @@ static int kbase_device_debugfs_init(struct kbase_device *kbdev) kbasep_trace_timeline_debugfs_init(kbdev); #endif /* CONFIG_MALI_TRACE_TIMELINE */ + debugfs_create_file("secure_mode", S_IRUGO, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_secure_mode_debugfs_fops); + return 0; out: @@ -3265,412 +3316,535 @@ static void kbase_logging_started_cb(void *data) } #endif -static struct attribute *kbase_attrs[] = { -#ifdef CONFIG_MALI_DEBUG - &dev_attr_debug_command.attr, - &dev_attr_js_softstop_always.attr, -#endif -#if !MALI_CUSTOMER_RELEASE - &dev_attr_force_replay.attr, -#endif - &dev_attr_js_timeouts.attr, - &dev_attr_soft_job_timeout.attr, - &dev_attr_gpuinfo.attr, - &dev_attr_dvfs_period.attr, - &dev_attr_pm_poweroff.attr, - &dev_attr_reset_timeout.attr, - &dev_attr_js_scheduling_period.attr, - &dev_attr_power_policy.attr, - &dev_attr_core_availability_policy.attr, - &dev_attr_core_mask.attr, - &dev_attr_mem_pool_size.attr, - &dev_attr_mem_pool_max_size.attr, - NULL -}; - -static const struct attribute_group kbase_attr_group = { - .attrs = kbase_attrs, -}; -static int kbase_platform_device_remove(struct platform_device *pdev) +static int kbase_common_device_init(struct kbase_device *kbdev) { - struct kbase_device *kbdev = to_kbase_device(&pdev->dev); - const struct list_head *dev_list; + int err; + struct mali_base_gpu_core_props *core_props; + enum { + inited_mem = (1u << 0), + inited_js = (1u << 1), + inited_pm_runtime_init = (1u << 6), +#ifdef CONFIG_MALI_DEVFREQ + inited_devfreq = (1u << 9), +#endif /* CONFIG_MALI_DEVFREQ */ +#ifdef CONFIG_MALI_MIPE_ENABLED + inited_tlstream = (1u << 10), +#endif /* CONFIG_MALI_MIPE_ENABLED */ + inited_backend_early = (1u << 11), + inited_backend_late = (1u << 12), + inited_device = (1u << 13), + inited_vinstr = (1u << 19), + inited_ipa = (1u << 20), + inited_job_fault = (1u << 21) + }; - if (!kbdev) - return -ENODEV; + int inited = 0; + u32 gpu_id; +#if defined(CONFIG_MALI_PLATFORM_VEXPRESS) + u32 ve_logic_tile = 0; +#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */ -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER - if (kbdev->inited_subsys & inited_buslogger) { - bl_core_client_unregister(kbdev->buslogger); - kbdev->inited_subsys &= ~inited_buslogger; - } -#endif + dev_set_drvdata(kbdev->dev, kbdev); - if (kbdev->inited_subsys & inited_sysfs_group) { - sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); - kbdev->inited_subsys &= ~inited_sysfs_group; - } + err = kbase_backend_early_init(kbdev); + if (err) + goto out_partial; + inited |= inited_backend_early; - if (kbdev->inited_subsys & inited_dev_list) { - dev_list = kbase_dev_list_get(); - list_del(&kbdev->entry); - kbase_dev_list_put(dev_list); - kbdev->inited_subsys &= ~inited_dev_list; - } + scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, + kbase_dev_nr); - if (kbdev->inited_subsys & inited_misc_register) { - misc_deregister(&kbdev->mdev); - kbdev->inited_subsys &= ~inited_misc_register; - } + kbase_disjoint_init(kbdev); - if (kbdev->inited_subsys & inited_get_device) { - put_device(kbdev->dev); - kbdev->inited_subsys &= ~inited_get_device; - } + /* obtain min/max configured gpu frequencies */ + core_props = &(kbdev->gpu_props.props.core_props); - if (kbdev->inited_subsys & inited_debugfs) { - kbase_device_debugfs_term(kbdev); - kbdev->inited_subsys &= ~inited_debugfs; + /* For versatile express platforms, min and max values of GPU frequency + * depend on the type of the logic tile; these values may not be known + * at the build time so in some cases a platform config file with wrong + * GPU freguency values may be included; to ensure the correct value of + * min and max GPU frequency is obtained, the type of the logic tile is + * read from the corresponding register on the platform and frequency + * values assigned accordingly.*/ +#if defined(CONFIG_MALI_PLATFORM_VEXPRESS) + ve_logic_tile = kbase_get_platform_logic_tile_type(); + + switch (ve_logic_tile) { + case 0x217: + /* Virtex 6, HBI0217 */ + core_props->gpu_freq_khz_min = VE_VIRTEX6_GPU_FREQ_MIN; + core_props->gpu_freq_khz_max = VE_VIRTEX6_GPU_FREQ_MAX; + break; + case 0x247: + /* Virtex 7, HBI0247 */ + core_props->gpu_freq_khz_min = VE_VIRTEX7_GPU_FREQ_MIN; + core_props->gpu_freq_khz_max = VE_VIRTEX7_GPU_FREQ_MAX; + break; + default: + /* all other logic tiles, i.e., Virtex 5 HBI0192 + * or unsuccessful reading from the platform - + * fall back to the config_platform default */ + core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; + core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; + break; } +#else + core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; + core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; +#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */ - if (kbdev->inited_subsys & inited_job_fault) { - kbase_debug_job_fault_dev_term(kbdev); - kbdev->inited_subsys &= ~inited_job_fault; - } + kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US; -#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY - if (kbdev->inited_subsys & inited_ipa) { - kbase_ipa_term(kbdev->ipa_ctx); - kbdev->inited_subsys &= ~inited_ipa; + err = kbase_device_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Can't initialize device (%d)\n", err); + goto out_partial; } -#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */ - if (kbdev->inited_subsys & inited_vinstr) { - kbase_vinstr_term(kbdev->vinstr_ctx); - kbdev->inited_subsys &= ~inited_vinstr; - } + inited |= inited_device; -#ifdef CONFIG_MALI_DEVFREQ - if (kbdev->inited_subsys & inited_devfreq) { - kbase_devfreq_term(kbdev); - kbdev->inited_subsys &= ~inited_devfreq; - } -#endif + if (kbdev->pm.callback_power_runtime_init) { + err = kbdev->pm.callback_power_runtime_init(kbdev); + if (err) + goto out_partial; - if (kbdev->inited_subsys & inited_backend_late) { - kbase_backend_late_term(kbdev); - kbdev->inited_subsys &= ~inited_backend_late; + inited |= inited_pm_runtime_init; } - if (kbdev->inited_subsys & inited_tlstream) { - kbase_tlstream_term(); - kbdev->inited_subsys &= ~inited_tlstream; - } + err = kbase_mem_init(kbdev); + if (err) + goto out_partial; - /* Bring job and mem sys to a halt before we continue termination */ + inited |= inited_mem; - if (kbdev->inited_subsys & inited_js) - kbasep_js_devdata_halt(kbdev); + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + gpu_id &= GPU_ID_VERSION_PRODUCT_ID; + gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; - if (kbdev->inited_subsys & inited_mem) - kbase_mem_halt(kbdev); + kbase_device_coherency_init(kbdev, gpu_id); - if (kbdev->inited_subsys & inited_js) { - kbasep_js_devdata_term(kbdev); - kbdev->inited_subsys &= ~inited_js; - } + err = kbasep_secure_mode_init(kbdev); + if (err) + goto out_partial; - if (kbdev->inited_subsys & inited_mem) { - kbase_mem_term(kbdev); - kbdev->inited_subsys &= ~inited_mem; - } + err = kbasep_js_devdata_init(kbdev); + if (err) + goto out_partial; - if (kbdev->inited_subsys & inited_pm_runtime_init) { - kbdev->pm.callback_power_runtime_term(kbdev); - kbdev->inited_subsys &= ~inited_pm_runtime_init; - } + inited |= inited_js; - if (kbdev->inited_subsys & inited_device) { - kbase_device_term(kbdev); - kbdev->inited_subsys &= ~inited_device; +#ifdef CONFIG_MALI_MIPE_ENABLED + err = kbase_tlstream_init(); + if (err) { + dev_err(kbdev->dev, "Couldn't initialize timeline stream\n"); + goto out_partial; } + inited |= inited_tlstream; +#endif /* CONFIG_MALI_MIPE_ENABLED */ - if (kbdev->inited_subsys & inited_backend_early) { - kbase_backend_early_term(kbdev); - kbdev->inited_subsys &= ~inited_backend_early; + err = kbase_backend_late_init(kbdev); + if (err) + goto out_partial; + inited |= inited_backend_late; + +#ifdef CONFIG_MALI_DEVFREQ + err = kbase_devfreq_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Couldn't initialize devfreq\n"); + goto out_partial; } + inited |= inited_devfreq; +#endif /* CONFIG_MALI_DEVFREQ */ - if (kbdev->inited_subsys & inited_power_control) { - power_control_term(kbdev); - kbdev->inited_subsys &= ~inited_power_control; + kbdev->vinstr_ctx = kbase_vinstr_init(kbdev); + if (!kbdev->vinstr_ctx) { + dev_err(kbdev->dev, "Can't initialize virtual instrumentation core\n"); + goto out_partial; } - if (kbdev->inited_subsys & inited_registers_map) { - registers_unmap(kbdev); - kbdev->inited_subsys &= ~inited_registers_map; + inited |= inited_vinstr; + + kbdev->ipa_ctx = kbase_ipa_init(kbdev); + if (!kbdev->ipa_ctx) { + dev_err(kbdev->dev, "Can't initialize IPA\n"); + goto out_partial; } -#ifdef CONFIG_MALI_NO_MALI - if (kbdev->inited_subsys & inited_gpu_device) { - gpu_device_destroy(kbdev); - kbdev->inited_subsys &= ~inited_gpu_device; + inited |= inited_ipa; + + err = kbase_debug_job_fault_dev_init(kbdev); + if (err) + goto out_partial; + + inited |= inited_job_fault; + + err = kbase_device_debugfs_init(kbdev); + if (err) + goto out_partial; + + /* intialise the kctx list */ + mutex_init(&kbdev->kctx_list_lock); + INIT_LIST_HEAD(&kbdev->kctx_list); + + kbdev->mdev.minor = MISC_DYNAMIC_MINOR; + kbdev->mdev.name = kbdev->devname; + kbdev->mdev.fops = &kbase_fops; + kbdev->mdev.parent = get_device(kbdev->dev); + + err = misc_register(&kbdev->mdev); + if (err) { + dev_err(kbdev->dev, "Couldn't register misc dev %s\n", kbdev->devname); + goto out_misc; } -#endif /* CONFIG_MALI_NO_MALI */ - if (kbdev->inited_subsys != 0) - dev_err(kbdev->dev, "Missing sub system termination\n"); + { + const struct list_head *dev_list = kbase_dev_list_get(); - kbase_device_free(kbdev); + list_add(&kbdev->entry, &kbase_dev_list); + kbase_dev_list_put(dev_list); + } + + dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device)); + + kbase_dev_nr++; return 0; -} -extern void kbase_platform_rk_shutdown(struct kbase_device *kbdev); -static void kbase_platform_device_shutdown(struct platform_device *pdev) -{ - struct kbase_device *kbdev = to_kbase_device(&pdev->dev); +out_misc: + put_device(kbdev->dev); + kbase_device_debugfs_term(kbdev); +out_partial: + if (inited & inited_job_fault) + kbase_debug_job_fault_dev_term(kbdev); + if (inited & inited_ipa) + kbase_ipa_term(kbdev->ipa_ctx); + if (inited & inited_vinstr) + kbase_vinstr_term(kbdev->vinstr_ctx); +#ifdef CONFIG_MALI_DEVFREQ + if (inited & inited_devfreq) + kbase_devfreq_term(kbdev); +#endif /* CONFIG_MALI_DEVFREQ */ + if (inited & inited_backend_late) + kbase_backend_late_term(kbdev); +#ifdef CONFIG_MALI_MIPE_ENABLED + if (inited & inited_tlstream) + kbase_tlstream_term(); +#endif /* CONFIG_MALI_MIPE_ENABLED */ - kbase_platform_rk_shutdown(kbdev); + if (inited & inited_js) + kbasep_js_devdata_halt(kbdev); + + if (inited & inited_mem) + kbase_mem_halt(kbdev); + + if (inited & inited_js) + kbasep_js_devdata_term(kbdev); + + if (inited & inited_mem) + kbase_mem_term(kbdev); + + if (inited & inited_pm_runtime_init) { + if (kbdev->pm.callback_power_runtime_term) + kbdev->pm.callback_power_runtime_term(kbdev); + } + + if (inited & inited_device) + kbase_device_term(kbdev); + + if (inited & inited_backend_early) + kbase_backend_early_term(kbdev); + + return err; } + +static struct attribute *kbase_attrs[] = { +#ifdef CONFIG_MALI_DEBUG + &dev_attr_debug_command.attr, + &dev_attr_js_softstop_always.attr, +#endif +#if !MALI_CUSTOMER_RELEASE + &dev_attr_force_replay.attr, +#endif + &dev_attr_js_timeouts.attr, + &dev_attr_gpuinfo.attr, + &dev_attr_dvfs_period.attr, + &dev_attr_pm_poweroff.attr, + &dev_attr_reset_timeout.attr, + &dev_attr_js_scheduling_period.attr, + &dev_attr_power_policy.attr, + &dev_attr_core_availability_policy.attr, + &dev_attr_core_mask.attr, + &dev_attr_mem_pool_size.attr, + &dev_attr_mem_pool_max_size.attr, + NULL +}; + +static const struct attribute_group kbase_attr_group = { + .attrs = kbase_attrs, +}; + +static int kbase_common_device_remove(struct kbase_device *kbdev); + static int kbase_platform_device_probe(struct platform_device *pdev) { struct kbase_device *kbdev; - struct mali_base_gpu_core_props *core_props; - u32 gpu_id; - const struct list_head *dev_list; + struct resource *reg_res; int err = 0; + int i; #ifdef CONFIG_OF err = kbase_platform_early_init(); if (err) { dev_err(&pdev->dev, "Early platform initialization failed\n"); - kbase_platform_device_remove(pdev); return err; } #endif kbdev = kbase_device_alloc(); if (!kbdev) { - dev_err(&pdev->dev, "Allocate device failed\n"); - kbase_platform_device_remove(pdev); - return -ENOMEM; + dev_err(&pdev->dev, "Can't allocate device\n"); + err = -ENOMEM; + goto out; } - - kbdev->dev = &pdev->dev; - dev_set_drvdata(kbdev->dev, kbdev); - #ifdef CONFIG_MALI_NO_MALI err = gpu_device_create(kbdev); if (err) { - dev_err(&pdev->dev, "Dummy model initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; + dev_err(&pdev->dev, "Can't initialize dummy model\n"); + goto out_midg; } - kbdev->inited_subsys |= inited_gpu_device; #endif /* CONFIG_MALI_NO_MALI */ - err = assign_irqs(pdev); - if (err) { - dev_err(&pdev->dev, "IRQ search failed\n"); - kbase_platform_device_remove(pdev); - return err; - } + kbdev->dev = &pdev->dev; + /* 3 IRQ resources */ + for (i = 0; i < 3; i++) { + struct resource *irq_res; + int irqtag; - err = registers_map(kbdev); - if (err) { - dev_err(&pdev->dev, "Register map failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_registers_map; + irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); + if (!irq_res) { + dev_err(kbdev->dev, "No IRQ resource at index %d\n", i); + err = -ENOENT; + goto out_platform_irq; + } - err = power_control_init(pdev); - if (err) { - dev_err(&pdev->dev, "Power control initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; +#ifdef CONFIG_OF + if (!strcmp(irq_res->name, "JOB")) { + irqtag = JOB_IRQ_TAG; + } else if (!strcmp(irq_res->name, "MMU")) { + irqtag = MMU_IRQ_TAG; + } else if (!strcmp(irq_res->name, "GPU")) { + irqtag = GPU_IRQ_TAG; + } else { + dev_err(&pdev->dev, "Invalid irq res name: '%s'\n", + irq_res->name); + err = -EINVAL; + goto out_irq_name; + } +#else + irqtag = i; +#endif /* CONFIG_OF */ + kbdev->irqs[irqtag].irq = irq_res->start; + kbdev->irqs[irqtag].flags = (irq_res->flags & IRQF_TRIGGER_MASK); } - kbdev->inited_subsys |= inited_power_control; + /* the first memory resource is the physical address of the GPU + * registers */ + reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!reg_res) { + dev_err(kbdev->dev, "Invalid register resource\n"); + err = -ENOENT; + goto out_platform_mem; + } - err = kbase_backend_early_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Early backend initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_backend_early; + kbdev->reg_start = reg_res->start; + kbdev->reg_size = resource_size(reg_res); - scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, - kbase_dev_nr); + err = kbase_common_reg_map(kbdev); + if (err) + goto out_reg_map; - kbase_disjoint_init(kbdev); +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_REGULATOR) + kbdev->regulator = regulator_get_optional(kbdev->dev, "mali"); + if (IS_ERR_OR_NULL(kbdev->regulator)) { + err = PTR_ERR(kbdev->regulator); - /* obtain min/max configured gpu frequencies */ - core_props = &(kbdev->gpu_props.props.core_props); - core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; - core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; + kbdev->regulator = NULL; + if (err == -EPROBE_DEFER) + goto out_regulator; + dev_info(kbdev->dev, "Continuing without Mali regulator control\n"); + /* Allow probe to continue without regulator */ + } +#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ - kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US; +#ifdef CONFIG_MALI_PLATFORM_DEVICETREE + pm_runtime_enable(kbdev->dev); +#endif - err = kbase_device_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Device initialization failed (%d)\n", err); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_device; + kbdev->clock = clk_get(kbdev->dev, "clk_mali"); + if (IS_ERR_OR_NULL(kbdev->clock)) { + err = PTR_ERR(kbdev->clock); - if (kbdev->pm.callback_power_runtime_init) { - err = kbdev->pm.callback_power_runtime_init(kbdev); + kbdev->clock = NULL; + if (err == -EPROBE_DEFER) + goto out_clock_prepare; + dev_info(kbdev->dev, "Continuing without Mali clock control\n"); + /* Allow probe to continue without clock. */ + } else { + err = clk_prepare_enable(kbdev->clock); if (err) { dev_err(kbdev->dev, - "Runtime PM initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; + "Failed to prepare and enable clock (%d)\n", err); + goto out_clock_prepare; } - kbdev->inited_subsys |= inited_pm_runtime_init; } - err = kbase_mem_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Memory subsystem initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_mem; - - gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - gpu_id &= GPU_ID_VERSION_PRODUCT_ID; - gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; - - kbase_device_coherency_init(kbdev, gpu_id); +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_PM_OPP) + /* Register the OPPs if they are available in device tree */ + if (dev_pm_opp_of_add_table(kbdev->dev) < 0) + dev_dbg(kbdev->dev, "OPP table not found\n"); +#endif - kbasep_protected_mode_init(kbdev); - err = kbasep_js_devdata_init(kbdev); + err = kbase_common_device_init(kbdev); if (err) { - dev_err(kbdev->dev, "Job JS devdata initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; + dev_err(kbdev->dev, "Failed kbase_common_device_init\n"); + goto out_common_init; } - kbdev->inited_subsys |= inited_js; - err = kbase_tlstream_init(); + err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); if (err) { - dev_err(kbdev->dev, "Timeline stream initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; + dev_err(&pdev->dev, "Failed to create sysfs entries\n"); + goto out_sysfs; } - kbdev->inited_subsys |= inited_tlstream; - err = kbase_backend_late_init(kbdev); +#ifdef CONFIG_MALI_FPGA_BUS_LOGGER + err = bl_core_client_register(kbdev->devname, + kbase_logging_started_cb, + kbdev, &kbdev->buslogger, + THIS_MODULE, NULL); if (err) { - dev_err(kbdev->dev, "Late backend initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; + dev_err(kbdev->dev, "Couldn't register bus log client\n"); + goto out_bl_core_register; } - kbdev->inited_subsys |= inited_backend_late; -#ifdef CONFIG_MALI_DEVFREQ - err = kbase_devfreq_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Fevfreq initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_devfreq; -#endif /* CONFIG_MALI_DEVFREQ */ + bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); +#endif + return 0; - kbdev->vinstr_ctx = kbase_vinstr_init(kbdev); - if (!kbdev->vinstr_ctx) { - dev_err(kbdev->dev, - "Virtual instrumentation initialization failed\n"); - kbase_platform_device_remove(pdev); - return -EINVAL; - } - kbdev->inited_subsys |= inited_vinstr; +#ifdef CONFIG_MALI_FPGA_BUS_LOGGER +out_bl_core_register: + sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); +#endif -#ifndef CONFIG_MALI_PRFCNT_SET_SECONDARY - kbdev->ipa_ctx = kbase_ipa_init(kbdev); - if (!kbdev->ipa_ctx) { - dev_err(kbdev->dev, "IPA initialization failed\n"); - kbase_platform_device_remove(pdev); - return -EINVAL; - } +out_sysfs: + kbase_common_device_remove(kbdev); +out_common_init: +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) + dev_pm_opp_of_remove_table(kbdev->dev); +#endif + clk_disable_unprepare(kbdev->clock); +out_clock_prepare: + clk_put(kbdev->clock); +#ifdef CONFIG_MALI_PLATFORM_DEVICETREE + pm_runtime_disable(kbdev->dev); +#endif +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_REGULATOR) +out_regulator: + regulator_put(kbdev->regulator); +#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ + kbase_common_reg_unmap(kbdev); +out_reg_map: +out_platform_mem: +#ifdef CONFIG_OF +out_irq_name: +#endif +out_platform_irq: +#ifdef CONFIG_MALI_NO_MALI + gpu_device_destroy(kbdev); +out_midg: +#endif /* CONFIG_MALI_NO_MALI */ + kbase_device_free(kbdev); +out: + return err; +} - kbdev->inited_subsys |= inited_ipa; -#endif /* CONFIG_MALI_PRFCNT_SET_SECONDARY */ +static int kbase_common_device_remove(struct kbase_device *kbdev) +{ + kbase_debug_job_fault_dev_term(kbdev); + kbase_ipa_term(kbdev->ipa_ctx); + kbase_vinstr_term(kbdev->vinstr_ctx); + sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group); - err = kbase_debug_job_fault_dev_init(kbdev); - if (err) { - dev_err(kbdev->dev, "Job fault debug initialization failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_job_fault; +#ifdef CONFIG_MALI_FPGA_BUS_LOGGER + if (kbdev->buslogger) + bl_core_client_unregister(kbdev->buslogger); +#endif - err = kbase_device_debugfs_init(kbdev); - if (err) { - dev_err(kbdev->dev, "DebugFS initialization failed"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_debugfs; +#ifdef CONFIG_DEBUG_FS + debugfs_remove_recursive(kbdev->mali_debugfs_directory); +#endif +#ifdef CONFIG_MALI_DEVFREQ + kbase_devfreq_term(kbdev); +#endif - /* initialize the kctx list */ - mutex_init(&kbdev->kctx_list_lock); - INIT_LIST_HEAD(&kbdev->kctx_list); + kbase_backend_late_term(kbdev); - kbdev->mdev.minor = MISC_DYNAMIC_MINOR; - kbdev->mdev.name = kbdev->devname; - kbdev->mdev.fops = &kbase_fops; - kbdev->mdev.parent = get_device(kbdev->dev); - kbdev->inited_subsys |= inited_get_device; + if (kbdev->pm.callback_power_runtime_term) + kbdev->pm.callback_power_runtime_term(kbdev); +#ifdef CONFIG_MALI_PLATFORM_DEVICETREE + pm_runtime_disable(kbdev->dev); +#endif - err = misc_register(&kbdev->mdev); - if (err) { - dev_err(kbdev->dev, "Misc device registration failed for %s\n", - kbdev->devname); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_misc_register; +#ifdef CONFIG_MALI_MIPE_ENABLED + kbase_tlstream_term(); +#endif /* CONFIG_MALI_MIPE_ENABLED */ - dev_list = kbase_dev_list_get(); - list_add(&kbdev->entry, &kbase_dev_list); - kbase_dev_list_put(dev_list); - kbdev->inited_subsys |= inited_dev_list; + kbasep_js_devdata_halt(kbdev); + kbase_mem_halt(kbdev); - err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group); - if (err) { - dev_err(&pdev->dev, "SysFS group creation failed\n"); - kbase_platform_device_remove(pdev); - return err; - } - kbdev->inited_subsys |= inited_sysfs_group; + kbasep_js_devdata_term(kbdev); + kbase_mem_term(kbdev); + kbase_backend_early_term(kbdev); -#ifdef CONFIG_MALI_FPGA_BUS_LOGGER - err = bl_core_client_register(kbdev->devname, - kbase_logging_started_cb, - kbdev, &kbdev->buslogger, - THIS_MODULE, NULL); - if (err == 0) { - kbdev->inited_subsys |= inited_buslogger; - bl_core_set_threshold(kbdev->buslogger, 1024*1024*1024); - } else { - dev_warn(kbdev->dev, "Bus log client registration failed\n"); - err = 0; + { + const struct list_head *dev_list = kbase_dev_list_get(); + + list_del(&kbdev->entry); + kbase_dev_list_put(dev_list); } -#endif + misc_deregister(&kbdev->mdev); + put_device(kbdev->dev); + kbase_common_reg_unmap(kbdev); + kbase_device_term(kbdev); + if (kbdev->clock) { + clk_disable_unprepare(kbdev->clock); + clk_put(kbdev->clock); + kbdev->clock = NULL; + } +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_REGULATOR) + regulator_put(kbdev->regulator); +#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ +#ifdef CONFIG_MALI_NO_MALI + gpu_device_destroy(kbdev); +#endif /* CONFIG_MALI_NO_MALI */ + kbase_device_free(kbdev); - dev_info(kbdev->dev, - "Probed as %s\n", dev_name(kbdev->mdev.this_device)); + return 0; +} - kbase_dev_nr++; +static int kbase_platform_device_remove(struct platform_device *pdev) +{ + struct kbase_device *kbdev = to_kbase_device(&pdev->dev); - return err; + if (!kbdev) + return -ENODEV; + + return kbase_common_device_remove(kbdev); } /** Suspend callback from the OS. @@ -3834,7 +4008,6 @@ MODULE_DEVICE_TABLE(of, kbase_dt_ids); static struct platform_driver kbase_platform_driver = { .probe = kbase_platform_device_probe, .remove = kbase_platform_device_remove, - .shutdown = kbase_platform_device_shutdown, .driver = { .name = kbase_drv_name, .owner = THIS_MODULE, @@ -3865,15 +4038,19 @@ static int __init kbase_driver_init(void) if (ret) return ret; +#ifndef CONFIG_MACH_MANTA #ifdef CONFIG_MALI_PLATFORM_FAKE ret = kbase_platform_fake_register(); if (ret) return ret; +#endif #endif ret = platform_driver_register(&kbase_platform_driver); +#ifndef CONFIG_MACH_MANTA #ifdef CONFIG_MALI_PLATFORM_FAKE if (ret) kbase_platform_fake_unregister(); +#endif #endif return ret; } @@ -3881,9 +4058,11 @@ static int __init kbase_driver_init(void) static void __exit kbase_driver_exit(void) { platform_driver_unregister(&kbase_platform_driver); +#ifndef CONFIG_MACH_MANTA #ifdef CONFIG_MALI_PLATFORM_FAKE kbase_platform_fake_unregister(); #endif +#endif } module_init(kbase_driver_init); @@ -3912,6 +4091,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released); EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counter); void kbase_trace_mali_pm_status(u32 event, u64 value) { diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c index 83c5c37942bd7..f3e426f9539b8 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,71 +15,51 @@ -#include -#include +#include "mali_kbase_debug_job_fault.h" #ifdef CONFIG_DEBUG_FS -static bool kbase_is_job_fault_event_pending(struct kbase_device *kbdev) +static bool kbase_is_job_fault_event_pending(struct list_head *event_list) { - struct list_head *event_list = &kbdev->job_fault_event_list; - unsigned long flags; - bool ret; + bool ret; - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); - ret = !list_empty(event_list); - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + ret = (!list_empty(event_list)); return ret; } -static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx) +static bool kbase_ctx_has_no_event_pending( + struct kbase_context *kctx, struct list_head *event_list) { - struct kbase_device *kbdev = kctx->kbdev; - struct list_head *event_list = &kctx->kbdev->job_fault_event_list; struct base_job_fault_event *event; - unsigned long flags; - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); - if (list_empty(event_list)) { - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); + if (list_empty(event_list)) return true; - } list_for_each_entry(event, event_list, head) { - if (event->katom->kctx == kctx) { - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, - flags); + if (event->katom->kctx == kctx) return false; - } } - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); - return true; + return false; } /* wait until the fault happen and copy the event */ static int kbase_job_fault_event_wait(struct kbase_device *kbdev, + struct list_head *event_list, struct base_job_fault_event *event) { - struct list_head *event_list = &kbdev->job_fault_event_list; struct base_job_fault_event *event_in; - unsigned long flags; - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); if (list_empty(event_list)) { - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); if (wait_event_interruptible(kbdev->job_fault_wq, - kbase_is_job_fault_event_pending(kbdev))) + kbase_is_job_fault_event_pending(event_list))) return -ERESTARTSYS; - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); } event_in = list_entry(event_list->next, struct base_job_fault_event, head); + event->event_code = event_in->event_code; event->katom = event_in->katom; - - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); - return 0; } @@ -122,16 +102,12 @@ static void kbase_job_fault_resume_event_cleanup(struct kbase_context *kctx) static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev) { struct list_head *event_list = &kbdev->job_fault_event_list; - unsigned long flags; - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); while (!list_empty(event_list)) { + kbase_job_fault_event_dequeue(kbdev, event_list); - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); wake_up(&kbdev->job_fault_resume_wq); - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); } - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); } static void kbase_job_fault_resume_worker(struct work_struct *data) @@ -153,7 +129,8 @@ static void kbase_job_fault_resume_worker(struct work_struct *data) * atoms belong to the same context. */ wait_event(kctx->kbdev->job_fault_resume_wq, - kbase_ctx_has_no_event_pending(kctx)); + kbase_ctx_has_no_event_pending(kctx, + &kctx->kbdev->job_fault_event_list)); atomic_set(&kctx->job_fault_count, 0); kbase_jd_done_worker(&katom->work); @@ -189,12 +166,9 @@ static void kbase_job_fault_event_post(struct kbase_device *kbdev, struct kbase_jd_atom *katom, u32 completion_code) { struct base_job_fault_event *event; - unsigned long flags; - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); event = kbase_job_fault_event_queue(&kbdev->job_fault_event_list, katom, completion_code); - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); wake_up_interruptible(&kbdev->job_fault_wq); @@ -319,10 +293,9 @@ static void *debug_job_fault_start(struct seq_file *m, loff_t *pos) */ if (*pos == 0) { event = kmalloc(sizeof(*event), GFP_KERNEL); - if (!event) - return NULL; event->reg_offset = 0; - if (kbase_job_fault_event_wait(kbdev, event)) { + if (kbase_job_fault_event_wait(kbdev, + &kbdev->job_fault_event_list, event)) { kfree(event); return NULL; } @@ -356,15 +329,11 @@ static void debug_job_fault_stop(struct seq_file *m, void *v) dev_info(kbdev->dev, "debug job fault seq stop stage 1"); } else { - unsigned long flags; - - spin_lock_irqsave(&kbdev->job_fault_event_lock, flags); if (!list_empty(&kbdev->job_fault_event_list)) { kbase_job_fault_event_dequeue(kbdev, &kbdev->job_fault_event_list); wake_up(&kbdev->job_fault_resume_wq); } - spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags); dev_info(kbdev->dev, "debug job fault seq stop stage 2"); } @@ -435,7 +404,6 @@ int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev) init_waitqueue_head(&(kbdev->job_fault_wq)); init_waitqueue_head(&(kbdev->job_fault_resume_wq)); - spin_lock_init(&kbdev->job_fault_event_lock); kbdev->job_fault_resume_workq = alloc_workqueue( "kbase_job_fault_resume_work_queue", WQ_MEM_RECLAIM, 1); diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h index a2bf8983c37c8..0930f905e4efc 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -18,6 +18,7 @@ #ifndef _KBASE_DEBUG_JOB_FAULT_H #define _KBASE_DEBUG_JOB_FAULT_H +#include #include #include diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c index a98355e33d073..42d1d832c0a39 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2013-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -125,8 +125,6 @@ static int debug_mem_show(struct seq_file *m, void *v) page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset])); mapping = vmap(&page, 1, VM_MAP, prot); - if (!mapping) - goto out; for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) { seq_printf(m, "%016llx:", i + ((map->start_pfn + @@ -162,15 +160,11 @@ static int debug_mem_open(struct inode *i, struct file *file) int ret; ret = seq_open(file, &ops); + if (ret) return ret; mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL); - if (!mem_data) { - ret = -ENOMEM; - goto out; - } - mem_data->kctx = kctx; INIT_LIST_HEAD(&mem_data->mapping_list); @@ -190,11 +184,6 @@ static int debug_mem_open(struct inode *i, struct file *file) continue; mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); - if (!mapping) { - ret = -ENOMEM; - kbase_gpu_vm_unlock(kctx); - goto out; - } mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); mapping->start_pfn = reg->start_pfn; @@ -208,23 +197,6 @@ static int debug_mem_open(struct inode *i, struct file *file) ((struct seq_file *)file->private_data)->private = mem_data; return 0; - -out: - if (mem_data) { - while (!list_empty(&mem_data->mapping_list)) { - struct debug_mem_mapping *mapping; - - mapping = list_first_entry(&mem_data->mapping_list, - struct debug_mem_mapping, node); - kbase_mem_phy_alloc_put(mapping->alloc); - list_del(&mapping->node); - kfree(mapping); - } - fput(kctx_file); - kfree(mem_data); - } - seq_release(i, file); - return ret; } static int debug_mem_release(struct inode *inode, struct file *file) diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h index 4bb8c2c7aec23..0fc5ff95234ef 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include @@ -53,8 +53,6 @@ #include "sync.h" #endif /* CONFIG_SYNC */ -#include "mali_kbase_dma_fence.h" - #ifdef CONFIG_DEBUG_FS #include #endif /* CONFIG_DEBUG_FS */ @@ -177,18 +175,18 @@ #define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) /** Atom has caused us to enter disjoint state */ #define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) +/* Atom has fail dependency on same-slot dependency */ +#define KBASE_KATOM_FLAG_FAIL_PREV (1<<6) /* Atom blocked on cross-slot dependency */ #define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) /* Atom has fail dependency on cross-slot dependency */ #define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) -/* Atom is currently in the list of atoms blocked on cross-slot dependencies */ -#define KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST (1<<9) +/* Atom has been submitted to JSCTX ringbuffers */ +#define KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED (1<<9) /* Atom is currently holding a context reference */ #define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10) -/* Atom requires GPU to be in protected mode */ -#define KBASE_KATOM_FLAG_PROTECTED (1<<11) -/* Atom has been stored in runnable_tree */ -#define KBASE_KATOM_FLAG_JSCTX_IN_TREE (1<<12) +/* Atom requires GPU to be in secure mode */ +#define KBASE_KATOM_FLAG_SECURE (1<<11) /* SW related flags about types of JS_COMMAND action * NOTE: These must be masked off by JS_COMMAND_MASK */ @@ -235,11 +233,11 @@ struct kbase_jd_atom_dependency { * * @return readonly reference to dependent ATOM. */ -static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) +static inline const struct kbase_jd_atom *const kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) { LOCAL_ASSERT(dep != NULL); - return (const struct kbase_jd_atom *)(dep->atom); + return (const struct kbase_jd_atom * const)(dep->atom); } /** @@ -250,7 +248,7 @@ static inline const struct kbase_jd_atom * kbase_jd_katom_dep_atom(const struct * * @return A dependency type value. */ -static inline u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep) +static inline const u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep) { LOCAL_ASSERT(dep != NULL); @@ -301,15 +299,13 @@ enum kbase_atom_gpu_rb_state { KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, /* Atom is in slot ringbuffer but is blocked on a previous atom */ KBASE_ATOM_GPU_RB_WAITING_BLOCKED, - /* Atom is in slot ringbuffer but is waiting for proected mode exit */ - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_EXIT, /* Atom is in slot ringbuffer but is waiting for cores to become * available */ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, /* Atom is in slot ringbuffer but is blocked on affinity */ KBASE_ATOM_GPU_RB_WAITING_AFFINITY, - /* Atom is in slot ringbuffer but is waiting for protected mode entry */ - KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_ENTRY, + /* Atom is in slot ringbuffer but is waiting for secure mode switch */ + KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE, /* Atom is in slot ringbuffer and ready to run */ KBASE_ATOM_GPU_RB_READY, /* Atom is in slot ringbuffer and has been submitted to the GPU */ @@ -319,23 +315,6 @@ enum kbase_atom_gpu_rb_state { KBASE_ATOM_GPU_RB_RETURN_TO_JS }; -enum kbase_atom_exit_protected_state { - /* - * Starting state: - * Check if a transition out of protected mode is required. - */ - KBASE_ATOM_EXIT_PROTECTED_CHECK, - /* Wait for the L2 to become idle in preparation for the reset. */ - KBASE_ATOM_EXIT_PROTECTED_IDLE_L2, - /* Issue the protected reset. */ - KBASE_ATOM_EXIT_PROTECTED_RESET, - /* - * End state; - * Wait for the reset to complete. - */ - KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT, -}; - struct kbase_ext_res { u64 gpu_address; struct kbase_mem_phy_alloc *alloc; @@ -352,13 +331,6 @@ struct kbase_jd_atom { struct list_head dep_head[2]; struct list_head dep_item[2]; const struct kbase_jd_atom_dependency dep[2]; - /* List head used during job dispatch job_done processing - as - * dependencies may not be entirely resolved at this point, we need to - * use a separate list head. */ - struct list_head jd_item; - /* true if atom's jd_item is currently on a list. Prevents atom being - * processed twice. */ - bool in_jd_list; u16 nr_extres; struct kbase_ext_res *extres; @@ -376,59 +348,6 @@ struct kbase_jd_atom { struct sync_fence *fence; struct sync_fence_waiter sync_waiter; #endif /* CONFIG_SYNC */ -#ifdef CONFIG_MALI_DMA_FENCE - struct { - /* This points to the dma-buf fence for this atom. If this is - * NULL then there is no fence for this atom and the other - * fields related to dma_fence may have invalid data. - * - * The context and seqno fields contain the details for this - * fence. - * - * This fence is signaled when the katom is completed, - * regardless of the event_code of the katom (signal also on - * failure). - */ - struct fence *fence; - /* The dma-buf fence context number for this atom. A unique - * context number is allocated to each katom in the context on - * context creation. - */ - unsigned int context; - /* The dma-buf fence sequence number for this atom. This is - * increased every time this katom uses dma-buf fence. - */ - atomic_t seqno; - /* This contains a list of all callbacks set up to wait on - * other fences. This atom must be held back from JS until all - * these callbacks have been called and dep_count have reached - * 0. The initial value of dep_count must be equal to the - * number of callbacks on this list. - * - * This list is protected by jctx.lock. Callbacks are added to - * this list when the atom is built and the wait are set up. - * All the callbacks then stay on the list until all callbacks - * have been called and the atom is queued, or cancelled, and - * then all callbacks are taken off the list and freed. - */ - struct list_head callbacks; - /* Atomic counter of number of outstandind dma-buf fence - * dependencies for this atom. When dep_count reaches 0 the - * atom may be queued. - * - * The special value "-1" may only be set after the count - * reaches 0, while holding jctx.lock. This indicates that the - * atom has been handled, either queued in JS or cancelled. - * - * If anyone but the dma-fence worker sets this to -1 they must - * ensure that any potentially queued worker must have - * completed before allowing the atom to be marked as unused. - * This can be done by flushing the fence work queue: - * kctx->dma_fence.wq. - */ - atomic_t dep_count; - } dma_fence; -#endif /* CONFIG_MALI_DMA_FENCE */ /* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */ enum base_jd_event_code event_code; @@ -464,11 +383,6 @@ struct kbase_jd_atom { atomic_t blocked; - /* Pointer to atom that this atom has same-slot dependency on */ - struct kbase_jd_atom *pre_dep; - /* Pointer to atom that has same-slot dependency on this atom */ - struct kbase_jd_atom *post_dep; - /* Pointer to atom that this atom has cross-slot dependency on */ struct kbase_jd_atom *x_pre_dep; /* Pointer to atom that has cross-slot dependency on this atom */ @@ -482,32 +396,11 @@ struct kbase_jd_atom { #ifdef CONFIG_DEBUG_FS struct base_job_fault_event fault_event; #endif - - /* List head used for two different purposes: - * 1. Overflow list for JS ring buffers. If an atom is ready to run, - * but there is no room in the JS ring buffer, then the atom is put - * on the ring buffer's overflow list using this list node. - * 2. List of waiting soft jobs. - */ - struct list_head queue; - - struct kbase_va_region *jit_addr_reg; - - /* If non-zero, this indicates that the atom will fail with the set - * event_code when the atom is processed. */ - enum base_jd_event_code will_fail_event_code; - - enum kbase_atom_exit_protected_state exit_protected_state; - - struct rb_node runnable_tree_node; - - /* 'Age' of atom relative to other atoms in the context. */ - u32 age; }; -static inline bool kbase_jd_katom_is_protected(const struct kbase_jd_atom *katom) +static inline bool kbase_jd_katom_is_secure(const struct kbase_jd_atom *katom) { - return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_PROTECTED); + return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_SECURE); } /* @@ -583,7 +476,6 @@ typedef u32 kbase_as_poke_state; struct kbase_mmu_setup { u64 transtab; u64 memattr; - u64 transcfg; }; /** @@ -602,7 +494,6 @@ struct kbase_as { enum kbase_mmu_fault_type fault_type; u32 fault_status; u64 fault_addr; - u64 fault_extra_addr; struct mutex transaction_mutex; struct kbase_mmu_setup current_setup; @@ -829,36 +720,27 @@ struct kbase_pm_device_data { }; /** - * struct kbase_protected_ops - Platform specific functions for GPU protected - * mode operations - * @protected_mode_enter: Callback to enter protected mode on the GPU - * @protected_mode_reset: Callback to reset the GPU and exit protected mode. - * @protected_mode_supported: Callback to check if protected mode is supported. + * struct kbase_secure_ops - Platform specific functions for GPU secure mode + * operations + * @secure_mode_enable: Callback to enable secure mode on the GPU + * @secure_mode_disable: Callback to disable secure mode on the GPU */ -struct kbase_protected_ops { - /** - * protected_mode_enter() - Enter protected mode on the GPU - * @kbdev: The kbase device - * - * Return: 0 on success, non-zero on error - */ - int (*protected_mode_enter)(struct kbase_device *kbdev); - +struct kbase_secure_ops { /** - * protected_mode_reset() - Reset the GPU and exit protected mode + * secure_mode_enable() - Enable secure mode on the GPU * @kbdev: The kbase device * * Return: 0 on success, non-zero on error */ - int (*protected_mode_reset)(struct kbase_device *kbdev); + int (*secure_mode_enable)(struct kbase_device *kbdev); /** - * protected_mode_supported() - Check if protected mode is supported + * secure_mode_disable() - Disable secure mode on the GPU * @kbdev: The kbase device * * Return: 0 on success, non-zero on error */ - bool (*protected_mode_supported)(struct kbase_device *kbdev); + int (*secure_mode_disable)(struct kbase_device *kbdev); }; @@ -905,13 +787,13 @@ struct kbase_device { u64 reg_start; size_t reg_size; void __iomem *reg; - struct { int irq; int flags; } irqs[3]; - +#ifdef CONFIG_HAVE_CLK struct clk *clock; +#endif #ifdef CONFIG_REGULATOR struct regulator *regulator; #endif @@ -925,7 +807,7 @@ struct kbase_device { atomic_t serving_gpu_irq; atomic_t serving_mmu_irq; spinlock_t reg_op_lock; -#endif /* CONFIG_MALI_NO_MALI */ +#endif /* CONFIG_MALI_NO_MALI */ struct kbase_pm_device_data pm; struct kbasep_js_device_data js_data; @@ -997,13 +879,16 @@ struct kbase_device { s8 nr_user_address_spaces; /**< Number of address spaces available to user contexts */ /* Structure used for instrumentation and HW counters dumping */ - struct kbase_hwcnt { + struct { /* The lock should be used when accessing any of the following members */ spinlock_t lock; struct kbase_context *kctx; u64 addr; + struct kbase_context *suspended_kctx; + struct kbase_uk_hwcnt_setup suspended_state; + struct kbase_instr_backend backend; } hwcnt; @@ -1019,6 +904,30 @@ struct kbase_device { struct kbase_trace *trace_rbuf; #endif + /* This is used to override the current job scheduler values for + * JS_SCHEDULING_PERIOD_NS + * JS_SOFT_STOP_TICKS + * JS_SOFT_STOP_TICKS_CL + * JS_HARD_STOP_TICKS_SS + * JS_HARD_STOP_TICKS_CL + * JS_HARD_STOP_TICKS_DUMPING + * JS_RESET_TICKS_SS + * JS_RESET_TICKS_CL + * JS_RESET_TICKS_DUMPING. + * + * These values are set via the js_timeouts sysfs file. + */ + u32 js_scheduling_period_ns; + int js_soft_stop_ticks; + int js_soft_stop_ticks_cl; + int js_hard_stop_ticks_ss; + int js_hard_stop_ticks_cl; + int js_hard_stop_ticks_dumping; + int js_reset_ticks_ss; + int js_reset_ticks_cl; + int js_reset_ticks_dumping; + bool js_timeouts_updated; + u32 reset_timeout_ms; struct mutex cacheclean_lock; @@ -1036,12 +945,8 @@ struct kbase_device { unsigned long current_freq; unsigned long current_voltage; #ifdef CONFIG_DEVFREQ_THERMAL -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) - struct devfreq_cooling_device *devfreq_cooling; -#else struct thermal_cooling_device *devfreq_cooling; #endif -#endif #endif struct kbase_ipa_context *ipa_ctx; @@ -1062,17 +967,11 @@ struct kbase_device { /* Root directory for per context entry */ struct dentry *debugfs_ctx_directory; -#ifdef CONFIG_MALI_DEBUG - /* bit for each as, set if there is new data to report */ - u64 debugfs_as_read_bitmap; -#endif /* CONFIG_MALI_DEBUG */ - /* failed job dump, used for separate debug process */ wait_queue_head_t job_fault_wq; wait_queue_head_t job_fault_resume_wq; struct workqueue_struct *job_fault_resume_workq; struct list_head job_fault_event_list; - spinlock_t job_fault_event_lock; struct kbase_context *kctx_fault; #if !MALI_CUSTOMER_RELEASE @@ -1118,41 +1017,24 @@ struct kbase_device { /* defaults for new context created for this device */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) - bool infinite_cache_active_default; -#else u32 infinite_cache_active_default; -#endif size_t mem_pool_max_size_default; /* system coherency mode */ u32 system_coherency; - /* Flag to track when cci snoops have been enabled on the interface */ - bool cci_snoop_enabled; - - /* SMC function IDs to call into Trusted firmware to enable/disable - * cache snooping. Value of 0 indicates that they are not used - */ - u32 snoop_enable_smc; - u32 snoop_disable_smc; - /* Protected operations */ - struct kbase_protected_ops *protected_ops; + /* Secure operations */ + struct kbase_secure_ops *secure_ops; /* - * true when GPU is put into protected mode + * true when GPU is put into secure mode */ - bool protected_mode; + bool secure_mode; /* - * true when GPU is transitioning into or out of protected mode + * true if secure mode is supported */ - bool protected_mode_transition; - - /* - * true if protected mode is supported - */ - bool protected_mode_support; + bool secure_mode_support; #ifdef CONFIG_MALI_DEBUG @@ -1168,26 +1050,46 @@ struct kbase_device { #endif /* Boolean indicating if an IRQ flush during reset is in progress. */ bool irq_reset_flush; +}; - /* list of inited sub systems. Used during terminate/error recovery */ - u32 inited_subsys; +/* JSCTX ringbuffer size must always be a power of 2 */ +#define JSCTX_RB_SIZE 256 +#define JSCTX_RB_MASK (JSCTX_RB_SIZE-1) + +/** + * struct jsctx_rb_entry - Entry in &struct jsctx_rb ring buffer + * @atom_id: Atom ID + */ +struct jsctx_rb_entry { + u16 atom_id; }; /** - * struct jsctx_queue - JS context atom queue - * @runnable_tree: Root of RB-tree containing currently runnable atoms on this - * job slot. - * @x_dep_head: Head item of the linked list of atoms blocked on cross-slot - * dependencies. Atoms on this list will be moved to the - * runnable_tree when the blocking atom completes. + * struct jsctx_rb - JS context atom ring buffer + * @entries: Array of size %JSCTX_RB_SIZE which holds the &struct + * kbase_jd_atom pointers which make up the contents of the ring + * buffer. + * @read_idx: Index into @entries. Indicates the next entry in @entries to + * read, and is incremented when pulling an atom, and decremented + * when unpulling. + * HW access lock must be held when accessing. + * @write_idx: Index into @entries. Indicates the next entry to use when + * adding atoms into the ring buffer, and is incremented when + * adding a new atom. + * jctx->lock must be held when accessing. + * @running_idx: Index into @entries. Indicates the last valid entry, and is + * incremented when remving atoms from the ring buffer. + * HW access lock must be held when accessing. * - * runpool_irq.lock must be held when accessing this structure. + * &struct jsctx_rb is a ring buffer of &struct kbase_jd_atom. */ -struct jsctx_queue { - struct rb_root runnable_tree; - struct list_head x_dep_head; -}; +struct jsctx_rb { + struct jsctx_rb_entry entries[JSCTX_RB_SIZE]; + u16 read_idx; /* HW access lock must be held when accessing */ + u16 write_idx; /* jctx->lock must be held when accessing */ + u16 running_idx; /* HW access lock must be held when accessing */ +}; #define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ (((minor) & 0xFFF) << 8) | \ @@ -1200,12 +1102,10 @@ struct kbase_context { unsigned long api_version; phys_addr_t pgd; struct list_head event_list; - struct list_head event_coalesce_list; struct mutex event_mutex; atomic_t event_closed; struct workqueue_struct *event_workq; atomic_t event_count; - int event_coalesce_count; bool is_compat; @@ -1216,7 +1116,6 @@ struct kbase_context { struct page *aliasing_sink_page; - struct mutex mmu_lock; struct mutex reg_lock; /* To be converted to a rwlock? */ struct rb_root reg_rbtree; /* Red-Black tree of GPU regions (live regions) */ @@ -1233,21 +1132,10 @@ struct kbase_context { struct kbase_mem_pool mem_pool; - struct shrinker reclaim; - struct list_head evict_list; - struct mutex evict_lock; - struct list_head waiting_soft_jobs; - spinlock_t waiting_soft_jobs_lock; #ifdef CONFIG_KDS struct list_head waiting_kds_resource; #endif -#ifdef CONFIG_MALI_DMA_FENCE - struct { - struct list_head waiting_resource; - struct workqueue_struct *wq; - } dma_fence; -#endif /* CONFIG_MALI_DMA_FENCE */ /** This is effectively part of the Run Pool, because it only has a valid * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in * @@ -1269,8 +1157,6 @@ struct kbase_context { * All other flags must be added there */ spinlock_t mm_update_lock; struct mm_struct *process_mm; - /* End of the SAME_VA zone */ - u64 same_va_end; #ifdef CONFIG_MALI_TRACE_TIMELINE struct kbase_trace_kctx_timeline timeline; @@ -1296,7 +1182,7 @@ struct kbase_context { #endif /* CONFIG_DEBUG_FS */ - struct jsctx_queue jsctx_queue + struct jsctx_rb jsctx_rb [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; /* Number of atoms currently pulled from this context */ @@ -1307,14 +1193,13 @@ struct kbase_context { bool pulled; /* true if infinite cache is to be enabled for new allocations. Existing * allocations will not change. bool stored as a u32 per Linux API */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) - bool infinite_cache_active; -#else u32 infinite_cache_active; -#endif /* Bitmask of slots that can be pulled from */ u32 slots_pullable; + /* true if address space assignment is pending */ + bool as_pending; + /* Backend specific data */ struct kbase_context_backend backend; @@ -1335,52 +1220,6 @@ struct kbase_context { /* true if context is counted in kbdev->js_data.nr_contexts_runnable */ bool ctx_runnable_ref; - - /* Waiting soft-jobs will fail when this timer expires */ - struct timer_list soft_job_timeout; - - /* JIT allocation management */ - struct kbase_va_region *jit_alloc[256]; - struct list_head jit_active_head; - struct list_head jit_pool_head; - struct list_head jit_destroy_head; - struct mutex jit_lock; - struct work_struct jit_work; - - /* External sticky resource management */ - struct list_head ext_res_meta_head; - - /* Used to record that a drain was requested from atomic context */ - atomic_t drain_pending; - - /* Current age count, used to determine age for newly submitted atoms */ - u32 age_count; -}; - -/** - * struct kbase_ctx_ext_res_meta - Structure which binds an external resource - * to a @kbase_context. - * @ext_res_node: List head for adding the metadata to a - * @kbase_context. - * @alloc: The physical memory allocation structure - * which is mapped. - * @gpu_addr: The GPU virtual address the resource is - * mapped to. - * - * External resources can be mapped into multiple contexts as well as the same - * context multiple times. - * As kbase_va_region itself isn't refcounted we can't attach our extra - * information to it as it could be removed under our feet leaving external - * resources pinned. - * This metadata structure binds a single external resource to a single - * context, ensuring that per context mapping is tracked separately so it can - * be overridden when needed and abuses by the application (freeing the resource - * multiple times) don't effect the refcount of the physical allocation. - */ -struct kbase_ctx_ext_res_meta { - struct list_head ext_res_node; - struct kbase_mem_phy_alloc *alloc; - u64 gpu_addr; }; enum kbase_reg_access_type { @@ -1410,7 +1249,7 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) } /* Conversion helpers for setting up high resolution timers */ -#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U)) +#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime((x)*1000000U)) #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) /* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */ @@ -1421,29 +1260,4 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev) /* Maximum number of times a job can be replayed */ #define BASEP_JD_REPLAY_LIMIT 15 -/* JobDescriptorHeader - taken from the architecture specifications, the layout - * is currently identical for all GPU archs. */ -struct job_descriptor_header { - u32 exception_status; - u32 first_incomplete_task; - u64 fault_pointer; - u8 job_descriptor_size : 1; - u8 job_type : 7; - u8 job_barrier : 1; - u8 _reserved_01 : 1; - u8 _reserved_1 : 1; - u8 _reserved_02 : 1; - u8 _reserved_03 : 1; - u8 _reserved_2 : 1; - u8 _reserved_04 : 1; - u8 _reserved_05 : 1; - u16 job_index; - u16 job_dependency_index_1; - u16 job_dependency_index_2; - union { - u64 _64; - u32 _32; - } next_job; -}; - #endif /* _KBASE_DEFS_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c index 62ab0caf9858a..c22e099db6f97 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_device.c +++ b/drivers/gpu/arm/midgard/mali_kbase_device.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,7 +30,6 @@ #include #include -#include #include #include @@ -146,32 +145,8 @@ static void kbase_device_all_as_term(struct kbase_device *kbdev) int kbase_device_init(struct kbase_device * const kbdev) { int i, err; -#ifdef CONFIG_ARM64 - struct device_node *np = NULL; -#endif /* CONFIG_ARM64 */ spin_lock_init(&kbdev->mmu_mask_change); -#ifdef CONFIG_ARM64 - kbdev->cci_snoop_enabled = false; - np = kbdev->dev->of_node; - if (np != NULL) { - if (of_property_read_u32(np, "snoop_enable_smc", - &kbdev->snoop_enable_smc)) - kbdev->snoop_enable_smc = 0; - if (of_property_read_u32(np, "snoop_disable_smc", - &kbdev->snoop_disable_smc)) - kbdev->snoop_disable_smc = 0; - /* Either both or none of the calls should be provided. */ - if (!((kbdev->snoop_disable_smc == 0 - && kbdev->snoop_enable_smc == 0) - || (kbdev->snoop_disable_smc != 0 - && kbdev->snoop_enable_smc != 0))) { - WARN_ON(1); - err = -EINVAL; - goto fail; - } - } -#endif /* CONFIG_ARM64 */ /* Get the list of workarounds for issues on the current HW * (identified by the GPU_ID register) */ @@ -245,11 +220,7 @@ int kbase_device_init(struct kbase_device * const kbdev) kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; -#ifdef CONFIG_MALI_GPU_MMU_AARCH64 - kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); -#else kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); -#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ #ifdef CONFIG_MALI_DEBUG init_waitqueue_head(&kbdev->driver_inactive_wait); diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c index bf8c304610eb3..0d0c5258aaa4a 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_event.c +++ b/drivers/gpu/arm/midgard/mali_kbase_event.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,7 +19,10 @@ #include #include + +#if defined(CONFIG_MALI_MIPE_ENABLED) #include +#endif static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom) { @@ -35,8 +38,10 @@ static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, stru KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight)); +#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_nret_atom_ctx(katom, kctx); kbase_tlstream_tl_del_atom(katom); +#endif katom->status = KBASE_JD_ATOM_STATE_UNUSED; @@ -142,29 +147,6 @@ static void kbase_event_process_noreport(struct kbase_context *kctx, } } -/** - * kbase_event_coalesce - Move pending events to the main event list - * @kctx: Context pointer - * - * kctx->event_list and kctx->event_coalesce_count must be protected - * by a lock unless this is the last thread using them - * (and we're about to terminate the lock). - * - * Return: The number of pending events moved to the main event list - */ -static int kbase_event_coalesce(struct kbase_context *kctx) -{ - const int event_count = kctx->event_coalesce_count; - - /* Join the list of pending events onto the tail of the main list - and reset it */ - list_splice_tail_init(&kctx->event_coalesce_list, &kctx->event_list); - kctx->event_coalesce_count = 0; - - /* Return the number of events moved */ - return event_count; -} - void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) { if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { @@ -181,24 +163,12 @@ void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) return; } - if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) { - /* Don't report the event until other event(s) have completed */ - mutex_lock(&ctx->event_mutex); - list_add_tail(&atom->dep_item[0], &ctx->event_coalesce_list); - ++ctx->event_coalesce_count; - mutex_unlock(&ctx->event_mutex); - } else { - /* Report the event and any pending events now */ - int event_count = 1; - - mutex_lock(&ctx->event_mutex); - event_count += kbase_event_coalesce(ctx); - list_add_tail(&atom->dep_item[0], &ctx->event_list); - atomic_add(event_count, &ctx->event_count); - mutex_unlock(&ctx->event_mutex); + mutex_lock(&ctx->event_mutex); + atomic_inc(&ctx->event_count); + list_add_tail(&atom->dep_item[0], &ctx->event_list); + mutex_unlock(&ctx->event_mutex); - kbase_event_wakeup(ctx); - } + kbase_event_wakeup(ctx); } KBASE_EXPORT_TEST_API(kbase_event_post); @@ -215,10 +185,8 @@ int kbase_event_init(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(kctx); INIT_LIST_HEAD(&kctx->event_list); - INIT_LIST_HEAD(&kctx->event_coalesce_list); mutex_init(&kctx->event_mutex); atomic_set(&kctx->event_count, 0); - kctx->event_coalesce_count = 0; atomic_set(&kctx->event_closed, false); kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); @@ -232,8 +200,6 @@ KBASE_EXPORT_TEST_API(kbase_event_init); void kbase_event_cleanup(struct kbase_context *kctx) { - int event_count; - KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(kctx->event_workq); @@ -246,9 +212,6 @@ void kbase_event_cleanup(struct kbase_context *kctx) * Note: use of kctx->event_list without a lock is safe because this must be the last * thread using it (because we're about to terminate the lock) */ - event_count = kbase_event_coalesce(kctx); - atomic_add(event_count, &kctx->event_count); - while (!list_empty(&kctx->event_list)) { struct base_jd_event_v2 event; diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c index 4af3e4815e953..a2174b24ac3cd 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,6 +20,7 @@ #include "mali_kbase_mem_linux.h" #include "mali_kbase_gator_api.h" #include "mali_kbase_gator_hwcnt_names.h" +#include "mali_kbase_instr.h" #define MALI_MAX_CORES_PER_GROUP 4 #define MALI_MAX_NUM_BLOCKS_PER_GROUP 8 @@ -27,23 +28,18 @@ #define MALI_BYTES_PER_COUNTER 4 struct kbase_gator_hwcnt_handles { - struct kbase_device *kbdev; - struct kbase_vinstr_client *vinstr_cli; - void *vinstr_buffer; - struct work_struct dump_work; - int dump_complete; - spinlock_t dump_lock; + struct kbase_device *kbdev; + struct kbase_context *kctx; + u64 hwcnt_gpu_va; + void *hwcnt_cpu_va; + struct kbase_vmap_struct hwcnt_map; }; -static void dump_worker(struct work_struct *work); - const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) { + uint32_t gpu_id; const char * const *hardware_counters; struct kbase_device *kbdev; - uint32_t gpu_id; - uint32_t product_id; - uint32_t count; if (!total_counters) return NULL; @@ -54,78 +50,58 @@ const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) return NULL; gpu_id = kbdev->gpu_props.props.core_props.product_id; - product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; - product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; - - if (GPU_ID_IS_NEW_FORMAT(product_id)) { - switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { - case GPU_ID2_PRODUCT_TMIX: - hardware_counters = hardware_counters_mali_tMIx; - count = ARRAY_SIZE(hardware_counters_mali_tMIx); - break; - default: - hardware_counters = NULL; - count = 0; - dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", - gpu_id); - break; - } - } else { - switch (gpu_id) { - /* If we are using a Mali-T60x device */ - case GPU_ID_PI_T60X: - hardware_counters = hardware_counters_mali_t60x; - count = ARRAY_SIZE(hardware_counters_mali_t60x); - break; - /* If we are using a Mali-T62x device */ - case GPU_ID_PI_T62X: - hardware_counters = hardware_counters_mali_t62x; - count = ARRAY_SIZE(hardware_counters_mali_t62x); - break; - /* If we are using a Mali-T72x device */ - case GPU_ID_PI_T72X: - hardware_counters = hardware_counters_mali_t72x; - count = ARRAY_SIZE(hardware_counters_mali_t72x); - break; - /* If we are using a Mali-T76x device */ - case GPU_ID_PI_T76X: - hardware_counters = hardware_counters_mali_t76x; - count = ARRAY_SIZE(hardware_counters_mali_t76x); - break; - /* If we are using a Mali-T82x device */ - case GPU_ID_PI_T82X: - hardware_counters = hardware_counters_mali_t82x; - count = ARRAY_SIZE(hardware_counters_mali_t82x); - break; - /* If we are using a Mali-T83x device */ - case GPU_ID_PI_T83X: - hardware_counters = hardware_counters_mali_t83x; - count = ARRAY_SIZE(hardware_counters_mali_t83x); - break; - /* If we are using a Mali-T86x device */ - case GPU_ID_PI_T86X: - hardware_counters = hardware_counters_mali_t86x; - count = ARRAY_SIZE(hardware_counters_mali_t86x); - break; - /* If we are using a Mali-T88x device */ - case GPU_ID_PI_TFRX: - hardware_counters = hardware_counters_mali_t88x; - count = ARRAY_SIZE(hardware_counters_mali_t88x); - break; - default: - hardware_counters = NULL; - count = 0; - dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", - gpu_id); - break; - } + + switch (gpu_id) { + /* If we are using a Mali-T60x device */ + case GPU_ID_PI_T60X: + hardware_counters = hardware_counters_mali_t60x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t60x); + break; + /* If we are using a Mali-T62x device */ + case GPU_ID_PI_T62X: + hardware_counters = hardware_counters_mali_t62x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t62x); + break; + /* If we are using a Mali-T72x device */ + case GPU_ID_PI_T72X: + hardware_counters = hardware_counters_mali_t72x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t72x); + break; + /* If we are using a Mali-T76x device */ + case GPU_ID_PI_T76X: + hardware_counters = hardware_counters_mali_t76x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t76x); + break; + /* If we are using a Mali-T82x device */ + case GPU_ID_PI_T82X: + hardware_counters = hardware_counters_mali_t82x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t82x); + break; + /* If we are using a Mali-T83x device */ + case GPU_ID_PI_T83X: + hardware_counters = hardware_counters_mali_t83x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t83x); + break; + /* If we are using a Mali-T86x device */ + case GPU_ID_PI_T86X: + hardware_counters = hardware_counters_mali_t86x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t86x); + break; + /* If we are using a Mali-T88x device */ + case GPU_ID_PI_TFRX: + hardware_counters = hardware_counters_mali_t88x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t88x); + break; + default: + hardware_counters = NULL; + *total_counters = 0; + dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", gpu_id); + break; } /* Release the kbdev reference. */ kbase_release_device(kbdev); - *total_counters = count; - /* If we return a string array take a reference on the module (or fail). */ if (hardware_counters && !try_module_get(THIS_MODULE)) return NULL; @@ -144,8 +120,13 @@ KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names); struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info) { struct kbase_gator_hwcnt_handles *hand; - struct kbase_uk_hwcnt_reader_setup setup; + struct kbase_uk_hwcnt_setup setup; + int err; uint32_t dump_size = 0, i = 0; + struct kbase_va_region *reg; + u64 flags; + u64 nr_pages; + u16 va_alignment = 0; if (!in_out_info) return NULL; @@ -154,19 +135,15 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn if (!hand) return NULL; - INIT_WORK(&hand->dump_work, dump_worker); - spin_lock_init(&hand->dump_lock); - /* Get the first device */ hand->kbdev = kbase_find_device(-1); if (!hand->kbdev) goto free_hand; - dump_size = kbase_vinstr_dump_size(hand->kbdev); - hand->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL); - if (!hand->vinstr_buffer) + /* Create a kbase_context */ + hand->kctx = kbase_create_context(hand->kbdev, true); + if (!hand->kctx) goto release_device; - in_out_info->kernel_dump_buffer = hand->vinstr_buffer; in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores; in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups; @@ -183,7 +160,7 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn in_out_info->nr_core_groups, GFP_KERNEL); if (!in_out_info->hwc_layout) - goto free_vinstr_buffer; + goto destroy_context; dump_size = in_out_info->nr_core_groups * MALI_MAX_NUM_BLOCKS_PER_GROUP * @@ -212,23 +189,23 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn } /* If we are using any other device */ } else { - uint32_t nr_l2, nr_sc_bits, j; + uint32_t nr_l2, nr_sc, j; uint64_t core_mask; nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices; core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask; - nr_sc_bits = fls64(core_mask); + nr_sc = hand->kbdev->gpu_props.props.coherency_info.group[0].num_cores; /* The job manager and tiler sets of counters * are always present */ - in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc_bits + nr_l2), GFP_KERNEL); + in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc + nr_l2), GFP_KERNEL); if (!in_out_info->hwc_layout) - goto free_vinstr_buffer; + goto destroy_context; - dump_size = (2 + nr_sc_bits + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; + dump_size = (2 + nr_sc + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; in_out_info->hwc_layout[i++] = JM_BLOCK; in_out_info->hwc_layout[i++] = TILER_BLOCK; @@ -246,32 +223,58 @@ struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcn } in_out_info->nr_hwc_blocks = i; + in_out_info->size = dump_size; + flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR; + nr_pages = PFN_UP(dump_size); + reg = kbase_mem_alloc(hand->kctx, nr_pages, nr_pages, 0, + &flags, &hand->hwcnt_gpu_va, &va_alignment); + if (!reg) + goto free_layout; + + hand->hwcnt_cpu_va = kbase_vmap(hand->kctx, hand->hwcnt_gpu_va, + dump_size, &hand->hwcnt_map); + + if (!hand->hwcnt_cpu_va) + goto free_buffer; + + in_out_info->kernel_dump_buffer = hand->hwcnt_cpu_va; + memset(in_out_info->kernel_dump_buffer, 0, nr_pages * PAGE_SIZE); + + /*setup.dump_buffer = (uintptr_t)in_out_info->kernel_dump_buffer;*/ + setup.dump_buffer = hand->hwcnt_gpu_va; setup.jm_bm = in_out_info->bitmask[0]; setup.tiler_bm = in_out_info->bitmask[1]; setup.shader_bm = in_out_info->bitmask[2]; setup.mmu_l2_bm = in_out_info->bitmask[3]; - hand->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(hand->kbdev->vinstr_ctx, - &setup, hand->vinstr_buffer); - if (!hand->vinstr_cli) { - dev_err(hand->kbdev->dev, "Failed to register gator with vinstr core"); - goto free_layout; - } + + err = kbase_instr_hwcnt_enable(hand->kctx, &setup); + if (err) + goto free_unmap; + + kbase_instr_hwcnt_clear(hand->kctx); return hand; +free_unmap: + kbase_vunmap(hand->kctx, &hand->hwcnt_map); + +free_buffer: + kbase_mem_free(hand->kctx, hand->hwcnt_gpu_va); + free_layout: kfree(in_out_info->hwc_layout); -free_vinstr_buffer: - kfree(hand->vinstr_buffer); +destroy_context: + kbase_destroy_context(hand->kctx); release_device: kbase_release_device(hand->kbdev); free_hand: kfree(hand); + return NULL; } KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init); @@ -282,39 +285,27 @@ void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct k kfree(in_out_info->hwc_layout); if (opaque_handles) { - cancel_work_sync(&opaque_handles->dump_work); - kbase_vinstr_detach_client(opaque_handles->vinstr_cli); - kfree(opaque_handles->vinstr_buffer); + kbase_instr_hwcnt_disable(opaque_handles->kctx); + kbase_vunmap(opaque_handles->kctx, &opaque_handles->hwcnt_map); + kbase_mem_free(opaque_handles->kctx, opaque_handles->hwcnt_gpu_va); + kbase_destroy_context(opaque_handles->kctx); kbase_release_device(opaque_handles->kbdev); kfree(opaque_handles); } } KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term); -static void dump_worker(struct work_struct *work) -{ - struct kbase_gator_hwcnt_handles *hand; - - hand = container_of(work, struct kbase_gator_hwcnt_handles, dump_work); - if (!kbase_vinstr_hwc_dump(hand->vinstr_cli, - BASE_HWCNT_READER_EVENT_MANUAL)) { - spin_lock_bh(&hand->dump_lock); - hand->dump_complete = 1; - spin_unlock_bh(&hand->dump_lock); - } else { - schedule_work(&hand->dump_work); - } -} - uint32_t kbase_gator_instr_hwcnt_dump_complete( struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success) { + bool ret_res, success_res; if (opaque_handles && success) { - *success = opaque_handles->dump_complete; - opaque_handles->dump_complete = 0; - return *success; + ret_res = kbase_instr_hwcnt_dump_complete(opaque_handles->kctx, + &success_res); + *success = (uint32_t)success_res; + return (uint32_t)(ret_res != 0); } return 0; } @@ -323,7 +314,9 @@ KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete); uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles) { if (opaque_handles) - schedule_work(&opaque_handles->dump_work); + return (kbase_instr_hwcnt_request_dump( + opaque_handles->kctx) == 0); + return 0; } KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq); diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h index c247dd698e19d..eb76f01b0fda4 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -2156,8 +2156,4 @@ static const char * const hardware_counters_mali_t88x[] = { "T88x_L2_REPLAY_FULL" }; -#include "mali_kbase_gator_hwcnt_names_tmix.h" - - - #endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h index a962ecb3f9c63..dc8af2d6e7946 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -77,9 +77,8 @@ /* Helper macro to create a complete GPU_ID (new format) */ #define GPU_ID2_MAKE(arch_major, arch_minor, arch_rev, product_major, \ version_major, version_minor, version_status) \ - (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev, \ - product_major) | \ - GPU_ID2_VERSION_MAKE(version_major, version_minor, \ + (GPU_ID2_PRODUCT_MAKE(arch_major, arch_minor, arch_rev) | \ + GPU_ID2_VERSION_MAKE(version_major, version_minor, \ version_status)) /* Helper macro to create a partial GPU_ID (new format) that identifies @@ -95,7 +94,6 @@ (((product_id) << GPU_ID2_PRODUCT_MAJOR_SHIFT) & \ GPU_ID2_PRODUCT_MODEL) -#define GPU_ID2_PRODUCT_TMIX GPU_ID2_MODEL_MAKE(6, 0) /* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ #define GPU_ID_S_15DEV0 0x1 diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c index 6df0a1cb1264a..82f4c36d509ef 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,7 +15,7 @@ -#include +#include #ifdef CONFIG_DEBUG_FS /** Show callback for the @c gpu_memory debugfs file. diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h index 7045693eb9109..3cf30a4e767ec 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2014, 2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,9 +23,10 @@ * */ -#ifndef _KBASE_GPU_MEMORY_DEBUGFS_H -#define _KBASE_GPU_MEMORY_DEBUGFS_H +#ifndef _KBASE_GPU_MEMORY_H +#define _KBASE_GPU_MEMORY_H +#include #include #include @@ -34,4 +35,4 @@ */ void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev); -#endif /*_KBASE_GPU_MEMORY_DEBUGFS_H*/ +#endif /*_KBASE_GPU_MEMORY_H*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h index f42e91b6daa18..781375a9a97f5 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h +++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c index de2461fb8de42..f2f93de9d2e81 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_hw.c +++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -37,16 +37,6 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID; product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT; - if (GPU_ID_IS_NEW_FORMAT(product_id)) { - switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { - case GPU_ID2_PRODUCT_TMIX: - features = base_hw_features_tMIx; - break; - default: - features = base_hw_features_generic; - break; - } - } else { switch (product_id) { case GPU_ID_PI_TFRX: /* FALLTHROUGH */ @@ -75,7 +65,7 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev) features = base_hw_features_generic; break; } - } + for (; *features != BASE_HW_FEATURE_END; features++) set_bit(*features, &kbdev->hw_features_mask[0]); @@ -94,25 +84,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) impl_tech = kbdev->gpu_props.props.thread_props.impl_tech; if (impl_tech != IMPLEMENTATION_MODEL) { - if (GPU_ID_IS_NEW_FORMAT(product_id)) { - switch (gpu_id) { - case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 1): - issues = base_hw_issues_tMIx_r0p0_05dev0; - break; - case GPU_ID2_MAKE(6, 0, 10, 0, 0, 0, 2): - issues = base_hw_issues_tMIx_r0p0; - break; - default: - if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == - GPU_ID2_PRODUCT_TMIX) { - issues = base_hw_issues_tMIx_r0p0; - } else { - dev_err(kbdev->dev, - "Unknown GPU ID %x", gpu_id); - return -EINVAL; - } - } - } else { switch (gpu_id) { case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0): issues = base_hw_issues_t60x_r0p0_15dev0; @@ -203,24 +174,11 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) issues = base_hw_issues_t82x_r1p0; break; default: - dev_err(kbdev->dev, - "Unknown GPU ID %x", gpu_id); + dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); return -EINVAL; } - } } else { /* Software model */ - if (GPU_ID_IS_NEW_FORMAT(product_id)) { - switch (gpu_id & GPU_ID2_PRODUCT_MODEL) { - case GPU_ID2_PRODUCT_TMIX: - issues = base_hw_issues_model_tMIx; - break; - default: - dev_err(kbdev->dev, - "Unknown GPU ID %x", gpu_id); - return -EINVAL; - } - } else { switch (product_id) { case GPU_ID_PI_T60X: issues = base_hw_issues_model_t60x; @@ -251,7 +209,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev) gpu_id); return -EINVAL; } - } } dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT); diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h index abe66078029f7..2efa293088a14 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,7 +71,9 @@ void kbase_backend_release_free_address_space(struct kbase_device *kbdev, * * kbase_gpu_next_job() will pull atoms from the active context. * - * Return: true if successful, false if ASID not assigned. + * Return: true if successful, false if ASID not assigned. If kctx->as_pending + * is true then ASID assignment will complete at some point in the + * future and will re-start scheduling, otherwise no ASIDs are available */ bool kbase_backend_use_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, @@ -211,15 +213,6 @@ int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js); */ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev); -/** - * kbase_backend_timeouts_changed() - Job Scheduler timeouts have changed. - * @kbdev: Device pointer - * - * Perform any required backend-specific actions (eg updating timeouts of - * currently running atoms). - */ -void kbase_backend_timeouts_changed(struct kbase_device *kbdev); - /** * kbase_backend_slot_free() - Return the number of jobs that can be currently * submitted to slot @js. @@ -326,28 +319,6 @@ bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev); * signalled to know when the reset has completed. */ void kbase_reset_gpu_locked(struct kbase_device *kbdev); - -/** - * kbase_reset_gpu_silent - Reset the GPU silently - * @kbdev: Device pointer - * - * Reset the GPU without trying to cancel jobs and don't emit messages into - * the kernel log while doing the reset. - * - * This function should be used in cases where we are doing a controlled reset - * of the GPU as part of normal processing (e.g. exiting protected mode) where - * the driver will have ensured the scheduler has been idled and all other - * users of the GPU (e.g. instrumentation) have been suspended. - */ -void kbase_reset_gpu_silent(struct kbase_device *kbdev); - -/** - * kbase_reset_gpu_active - Reports if the GPU is being reset - * @kbdev: Device pointer - * - * Return: True if the GPU is in the process of being reset. - */ -bool kbase_reset_gpu_active(struct kbase_device *kbdev); #endif /** diff --git a/drivers/gpu/arm/midgard/mali_kbase_instr.c b/drivers/gpu/arm/midgard/mali_kbase_instr.c new file mode 100644 index 0000000000000..fda317b90176e --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_instr.c @@ -0,0 +1,129 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Base kernel instrumentation APIs. + */ + +#include +#include + +void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev) +{ + struct kbase_context *kctx; + + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(!kbdev->hwcnt.suspended_kctx); + + kctx = kbdev->hwcnt.kctx; + kbdev->hwcnt.suspended_kctx = kctx; + + /* Relevant state was saved into hwcnt.suspended_state when enabling the + * counters */ + + if (kctx) { + KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.flags & + KBASE_CTX_FLAG_PRIVILEGED); + kbase_instr_hwcnt_disable(kctx); + } +} + +void kbase_instr_hwcnt_resume(struct kbase_device *kbdev) +{ + struct kbase_context *kctx; + + KBASE_DEBUG_ASSERT(kbdev); + + kctx = kbdev->hwcnt.suspended_kctx; + kbdev->hwcnt.suspended_kctx = NULL; + + if (kctx) { + int err; + + err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, + &kbdev->hwcnt.suspended_state); + WARN(err, "Failed to restore instrumented hardware counters on resume\n"); + } +} + +int kbase_instr_hwcnt_enable(struct kbase_context *kctx, + struct kbase_uk_hwcnt_setup *setup) +{ + struct kbase_device *kbdev; + int err; + + kbdev = kctx->kbdev; + + /* Mark the context as active so the GPU is kept turned on */ + /* A suspend won't happen here, because we're in a syscall from a + * userspace thread. */ + kbase_pm_context_active(kbdev); + + /* Schedule the context in */ + kbasep_js_schedule_privileged_ctx(kbdev, kctx); + err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, setup); + if (err) { + /* Release the context. This had its own Power Manager Active + * reference */ + kbasep_js_release_privileged_ctx(kbdev, kctx); + + /* Also release our Power Manager Active reference */ + kbase_pm_context_idle(kbdev); + } + + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_enable); + +int kbase_instr_hwcnt_disable(struct kbase_context *kctx) +{ + int err = -EINVAL; + struct kbase_device *kbdev = kctx->kbdev; + + err = kbase_instr_hwcnt_disable_internal(kctx); + if (err) + goto out; + + /* Release the context. This had its own Power Manager Active reference + */ + kbasep_js_release_privileged_ctx(kbdev, kctx); + + /* Also release our Power Manager Active reference */ + kbase_pm_context_idle(kbdev); + + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", + kctx); +out: + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_disable); + +int kbase_instr_hwcnt_dump(struct kbase_context *kctx) +{ + int err; + + err = kbase_instr_hwcnt_request_dump(kctx); + if (err) + return err; + + err = kbase_instr_hwcnt_wait_for_dump(kctx); + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump); + diff --git a/drivers/gpu/arm/midgard/mali_kbase_instr.h b/drivers/gpu/arm/midgard/mali_kbase_instr.h new file mode 100644 index 0000000000000..ac3355e53634d --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_instr.h @@ -0,0 +1,75 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Instrumentation API definitions + */ + +#ifndef _KBASE_INSTR_H_ +#define _KBASE_INSTR_H_ + +#include + +/** + * kbase_instr_hwcnt_enable() - Enable HW counters collection + * @kctx: Kbase context + * @setup: &struct kbase_uk_hwcnt_setup containing configuration + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_enable(struct kbase_context *kctx, + struct kbase_uk_hwcnt_setup *setup); + +/** + * kbase_instr_hwcnt_disable() - Disable HW counters collection + * @kctx: Kbase context + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_disable(struct kbase_context *kctx); + +/** + * kbase_instr_hwcnt_dump() - Trigger dump of HW counters and wait for + * completion + * @kctx: Kbase context + * + * Context: might sleep, waiting for dump to complete + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_dump(struct kbase_context *kctx); + +/** + * kbase_instr_hwcnt_suspend() - GPU is suspending, stop HW counter collection + * @kbdev: Kbase device + * + * It's assumed that there's only one privileged context. + * + * Safe to do this without lock when doing an OS suspend, because it only + * changes in response to user-space IOCTLs + */ +void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev); + +/** + * kbase_instr_hwcnt_resume() - GPU is resuming, resume HW counter collection + * @kbdev: Kbase device + */ +void kbase_instr_hwcnt_resume(struct kbase_device *kbdev); + +#endif /* _KBASE_INSTR_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_ipa.c b/drivers/gpu/arm/midgard/mali_kbase_ipa.c index c579d0a589f70..6ac97eb7937c6 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_ipa.c +++ b/drivers/gpu/arm/midgard/mali_kbase_ipa.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -135,7 +135,7 @@ static void init_ipa_groups(struct kbase_ipa_context *ctx) memcpy(ctx->groups, ipa_groups_def, sizeof(ctx->groups)); } -#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) +#if defined(CONFIG_OF) && (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)) static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx) { struct kbase_device *kbdev = ctx->kbdev; @@ -145,7 +145,7 @@ static int update_ipa_groups_from_dt(struct kbase_ipa_context *ctx) size_t i; int err; - np = of_get_child_by_name(kbdev->dev->of_node, "ipa-groups"); + np = of_find_node_by_name(kbdev->dev->of_node, "ipa-groups"); if (!np) return 0; diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c index 3e0a5892cc7a4..1f9fbd9ee6d03 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_jd.c +++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,15 +25,20 @@ #endif #include #include +#ifdef CONFIG_UMP +#include +#endif /* CONFIG_UMP */ #include #include #include +#include #include #include -#include -#include "mali_kbase_dma_fence.h" +#if defined(CONFIG_MALI_MIPE_ENABLED) +#include +#endif #define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) @@ -45,7 +50,7 @@ /* Return whether katom will run on the GPU or not. Currently only soft jobs and * dependency-only atoms do not run on the GPU */ #define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ - ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == \ + ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == \ BASE_JD_REQ_DEP))) /* * This is the kernel side of the API. Only entry points are: @@ -80,23 +85,22 @@ static int jd_run_atom(struct kbase_jd_atom *katom) KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); - if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { /* Dependency only atom */ katom->status = KBASE_JD_ATOM_STATE_COMPLETED; return 0; } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { /* Soft-job */ - if (katom->will_fail_event_code) { - katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - return 0; - } - if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_SOFT_REPLAY) { if (!kbase_replay_process(katom)) katom->status = KBASE_JD_ATOM_STATE_COMPLETED; } else if (kbase_process_soft_job(katom) == 0) { kbase_finish_soft_job(katom); katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + } else { + /* The job has not completed */ + list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs); } return 0; } @@ -106,39 +110,6 @@ static int jd_run_atom(struct kbase_jd_atom *katom) return kbasep_js_add_job(kctx, katom); } -#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE) -void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom) -{ - struct kbase_device *kbdev; - - KBASE_DEBUG_ASSERT(katom); - kbdev = katom->kctx->kbdev; - KBASE_DEBUG_ASSERT(kbdev); - - /* Check whether the atom's other dependencies were already met. If - * katom is a GPU atom then the job scheduler may be able to represent - * the dependencies, hence we may attempt to submit it before they are - * met. Other atoms must have had both dependencies resolved. - */ - if (IS_GPU_ATOM(katom) || - (!kbase_jd_katom_dep_atom(&katom->dep[0]) && - !kbase_jd_katom_dep_atom(&katom->dep[1]))) { - /* katom dep complete, attempt to run it */ - bool resched = false; - - resched = jd_run_atom(katom); - - if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { - /* The atom has already finished */ - resched |= jd_done_nolock(katom, NULL); - } - - if (resched) - kbase_js_sched_all(kbdev); - } -} -#endif - #ifdef CONFIG_KDS /* Add the katom to the kds waiting list. @@ -171,20 +142,44 @@ static void kds_dep_clear(void *callback_parameter, void *callback_extra_paramet { struct kbase_jd_atom *katom; struct kbase_jd_context *ctx; + struct kbase_device *kbdev; katom = (struct kbase_jd_atom *)callback_parameter; KBASE_DEBUG_ASSERT(katom); - ctx = &katom->kctx->jctx; + kbdev = katom->kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev); - /* If KDS resource has already been satisfied (e.g. due to zapping) - * do nothing. - */ mutex_lock(&ctx->lock); - if (!katom->kds_dep_satisfied) { - katom->kds_dep_satisfied = true; - kbase_jd_dep_clear_locked(katom); + + /* KDS resource has already been satisfied (e.g. due to zapping) */ + if (katom->kds_dep_satisfied) + goto out; + + /* This atom's KDS dependency has now been met */ + katom->kds_dep_satisfied = true; + + /* Check whether the atom's other dependencies were already met. If + * katom is a GPU atom then the job scheduler may be able to represent + * the dependencies, hence we may attempt to submit it before they are + * met. Other atoms must have had both dependencies resolved */ + if (IS_GPU_ATOM(katom) || + (!kbase_jd_katom_dep_atom(&katom->dep[0]) && + !kbase_jd_katom_dep_atom(&katom->dep[1]))) { + /* katom dep complete, attempt to run it */ + bool resched = false; + + resched = jd_run_atom(katom); + + if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { + /* The atom has already finished */ + resched |= jd_done_nolock(katom, NULL); + } + + if (resched) + kbase_js_sched_all(kbdev); } + out: mutex_unlock(&ctx->lock); } @@ -204,6 +199,208 @@ static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom) } #endif /* CONFIG_KDS */ +static int kbase_jd_user_buf_map(struct kbase_context *kctx, + struct kbase_va_region *reg) +{ + long pinned_pages; + struct kbase_mem_phy_alloc *alloc; + struct page **pages; + phys_addr_t *pa; + long i; + int err = -ENOMEM; + unsigned long address; + struct task_struct *owner; + struct device *dev; + unsigned long offset; + unsigned long local_size; + + alloc = reg->gpu_alloc; + pa = kbase_get_gpu_phy_pages(reg); + address = alloc->imported.user_buf.address; + owner = alloc->imported.user_buf.owner; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); + + pages = alloc->imported.user_buf.pages; + + down_read(&owner->mm->mmap_sem); + pinned_pages = get_user_pages(owner, owner->mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR, + 0, pages, NULL); + up_read(&owner->mm->mmap_sem); + + if (pinned_pages <= 0) + return pinned_pages; + + if (pinned_pages != alloc->imported.user_buf.nr_pages) { + for (i = 0; i < pinned_pages; i++) + put_page(pages[i]); + return -ENOMEM; + } + + dev = kctx->kbdev->dev; + offset = address & ~PAGE_MASK; + local_size = alloc->imported.user_buf.size; + + for (i = 0; i < pinned_pages; i++) { + dma_addr_t dma_addr; + unsigned long min; + + min = MIN(PAGE_SIZE - offset, local_size); + dma_addr = dma_map_page(dev, pages[i], + offset, min, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, dma_addr)) + goto unwind; + + alloc->imported.user_buf.dma_addrs[i] = dma_addr; + pa[i] = page_to_phys(pages[i]); + + local_size -= min; + offset = 0; + } + + alloc->nents = pinned_pages; + + err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa, + kbase_reg_current_backed_size(reg), + reg->flags); + if (err == 0) + return 0; + + alloc->nents = 0; + /* fall down */ +unwind: + while (i--) { + dma_unmap_page(kctx->kbdev->dev, + alloc->imported.user_buf.dma_addrs[i], + PAGE_SIZE, DMA_BIDIRECTIONAL); + put_page(pages[i]); + pages[i] = NULL; + } + + return err; +} + +static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, + struct kbase_mem_phy_alloc *alloc, bool writeable) +{ + long i; + struct page **pages; + unsigned long size = alloc->imported.user_buf.size; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); + pages = alloc->imported.user_buf.pages; + for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { + unsigned long local_size; + dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + + local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); + dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, + DMA_BIDIRECTIONAL); + if (writeable) + set_page_dirty_lock(pages[i]); + put_page(pages[i]); + pages[i] = NULL; + + size -= local_size; + } + alloc->nents = 0; +} + +/* not to use sg_dma_len. */ +#define MALI_SG_DMA_LEN(sg) ((sg)->length) + +#ifdef CONFIG_DMA_SHARED_BUFFER +static int kbase_jd_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + struct sg_table *sgt; /* scatterlist_table */ + struct scatterlist *s; + int i; + phys_addr_t *pa; + int err; + size_t count = 0; + struct kbase_mem_phy_alloc *alloc; + + alloc = reg->gpu_alloc; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM); + KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt); + sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, DMA_BIDIRECTIONAL); + + if (IS_ERR_OR_NULL(sgt)) + return -EINVAL; + + /* save for later */ + alloc->imported.umm.sgt = sgt; + + pa = kbase_get_gpu_phy_pages(reg); + KBASE_DEBUG_ASSERT(pa); + + for_each_sg(sgt->sgl, s, sgt->nents, i) { + int j; + /* size_t pages = PFN_UP(sg_dma_len(s)); */ + size_t pages = PFN_UP(MALI_SG_DMA_LEN(s)); + + WARN_ONCE(MALI_SG_DMA_LEN(s) & (PAGE_SIZE-1), + "MALI_SG_DMA_LEN(s)=%u is not a multiple of PAGE_SIZE\n", + MALI_SG_DMA_LEN(s)); + /* + WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1), + "sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n", + sg_dma_len(s)); + */ + + WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), + "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", + (unsigned long long) sg_dma_address(s)); + + for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++) + *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT); + + WARN_ONCE(j < pages, + "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", + alloc->imported.umm.dma_buf->size); + } + + if (WARN_ONCE(count < reg->nr_pages, + "sg list from dma_buf_map_attachment < dma_buf->size=%zu, count : %zu, reg->nr_pages : %zu. \n", + alloc->imported.umm.dma_buf->size, + count, + reg->nr_pages)) { + err = -EINVAL; + goto out; + } + + /* Update nents as we now have pages to map */ + alloc->nents = count; + + err = kbase_mmu_insert_pages(kctx, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD); + +out: + if (err) { + dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + alloc->imported.umm.sgt = NULL; + } + + return err; +} + +static void kbase_jd_umm_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc) +{ + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(alloc); + KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment); + KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt); + dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, + alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + alloc->imported.umm.sgt = NULL; + alloc->nents = 0; +} +#endif /* CONFIG_DMA_SHARED_BUFFER */ + void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) { #ifdef CONFIG_KDS @@ -223,16 +420,6 @@ void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) kds_resource_set_release_sync(&katom->kds_rset); } #endif /* CONFIG_KDS */ - -#ifdef CONFIG_MALI_DMA_FENCE - /* Flush dma-fence workqueue to ensure that any callbacks that may have - * been queued are done before continuing. - * Any successfully completed atom would have had all it's callbacks - * completed before the atom was run, so only flush for failed atoms. - */ - if (katom->event_code != BASE_JD_EVENT_DONE) - flush_workqueue(katom->kctx->dma_fence.wq); -#endif /* CONFIG_MALI_DMA_FENCE */ } static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) @@ -246,10 +433,6 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) katom->kds_dep_satisfied = true; #endif /* CONFIG_KDS */ -#ifdef CONFIG_MALI_DMA_FENCE - kbase_dma_fence_signal(katom); -#endif /* CONFIG_MALI_DMA_FENCE */ - kbase_gpu_vm_lock(katom->kctx); /* only roll back if extres is non-NULL */ if (katom->extres) { @@ -258,12 +441,56 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) res_no = katom->nr_extres; while (res_no-- > 0) { struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; - struct kbase_va_region *reg; - reg = kbase_region_tracker_find_region_base_address( - katom->kctx, - katom->extres[res_no].gpu_address); - kbase_unmap_external_resource(katom->kctx, reg, alloc); + switch (alloc->type) { +#ifdef CONFIG_DMA_SHARED_BUFFER + case KBASE_MEM_TYPE_IMPORTED_UMM: { + alloc->imported.umm.current_mapping_usage_count--; + + if (0 == alloc->imported.umm.current_mapping_usage_count) { + struct kbase_va_region *reg; + + reg = kbase_region_tracker_find_region_base_address( + katom->kctx, + katom->extres[res_no].gpu_address); + + if (reg && reg->gpu_alloc == alloc) + kbase_mmu_teardown_pages( + katom->kctx, + reg->start_pfn, + kbase_reg_current_backed_size(reg)); + + kbase_jd_umm_unmap(katom->kctx, alloc); + } + } + break; +#endif /* CONFIG_DMA_SHARED_BUFFER */ + case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { + alloc->imported.user_buf.current_mapping_usage_count--; + + if (0 == alloc->imported.user_buf.current_mapping_usage_count) { + struct kbase_va_region *reg; + + reg = kbase_region_tracker_find_region_base_address( + katom->kctx, + katom->extres[res_no].gpu_address); + + if (reg && reg->gpu_alloc == alloc) + kbase_mmu_teardown_pages( + katom->kctx, + reg->start_pfn, + kbase_reg_current_backed_size(reg)); + + kbase_jd_user_buf_unmap(katom->kctx, + alloc, + reg->flags & KBASE_REG_GPU_WR); + } + } + break; + default: + break; + } + kbase_mem_phy_alloc_put(katom->extres[res_no].alloc); } kfree(katom->extres); katom->extres = NULL; @@ -271,6 +498,24 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) kbase_gpu_vm_unlock(katom->kctx); } +#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS) +static void add_kds_resource(struct kds_resource *kds_res, struct kds_resource **kds_resources, u32 *kds_res_count, unsigned long *kds_access_bitmap, bool exclusive) +{ + u32 i; + + for (i = 0; i < *kds_res_count; i++) { + /* Duplicate resource, ignore */ + if (kds_resources[i] == kds_res) + return; + } + + kds_resources[*kds_res_count] = kds_res; + if (exclusive) + set_bit(*kds_res_count, kds_access_bitmap); + (*kds_res_count)++; +} +#endif + /* * Set up external resources needed by this job. * @@ -286,11 +531,6 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st struct kds_resource **kds_resources = NULL; unsigned long *kds_access_bitmap = NULL; #endif /* CONFIG_KDS */ -#ifdef CONFIG_MALI_DMA_FENCE - struct kbase_dma_fence_resv_info info = { - .dma_fence_resv_count = 0, - }; -#endif struct base_external_resource *input_extres; KBASE_DEBUG_ASSERT(katom); @@ -326,53 +566,27 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st KBASE_DEBUG_ASSERT(0 != katom->nr_extres); kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL); - if (!kds_resources) { + if (NULL == kds_resources) { err_ret_val = -ENOMEM; goto early_err_out; } KBASE_DEBUG_ASSERT(0 != katom->nr_extres); - kds_access_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres), - sizeof(unsigned long), - GFP_KERNEL); - if (!kds_access_bitmap) { - err_ret_val = -ENOMEM; - goto early_err_out; - } -#endif /* CONFIG_KDS */ - -#ifdef CONFIG_MALI_DMA_FENCE - info.resv_objs = kmalloc_array(katom->nr_extres, - sizeof(struct reservation_object *), - GFP_KERNEL); - if (!info.resv_objs) { - err_ret_val = -ENOMEM; - goto early_err_out; - } + kds_access_bitmap = kzalloc(sizeof(unsigned long) * ((katom->nr_extres + BITS_PER_LONG - 1) / BITS_PER_LONG), GFP_KERNEL); - info.dma_fence_excl_bitmap = kcalloc(BITS_TO_LONGS(katom->nr_extres), - sizeof(unsigned long), - GFP_KERNEL); - if (!info.dma_fence_excl_bitmap) { + if (NULL == kds_access_bitmap) { err_ret_val = -ENOMEM; goto early_err_out; } -#endif /* CONFIG_MALI_DMA_FENCE */ - - /* Take the processes mmap lock */ - down_read(¤t->mm->mmap_sem); +#endif /* CONFIG_KDS */ /* need to keep the GPU VM locked while we set up UMM buffers */ kbase_gpu_vm_lock(katom->kctx); for (res_no = 0; res_no < katom->nr_extres; res_no++) { struct base_external_resource *res; struct kbase_va_region *reg; - struct kbase_mem_phy_alloc *alloc; - bool exclusive; res = &input_extres[res_no]; - exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE) - ? true : false; reg = kbase_region_tracker_find_region_enclosing_address( katom->kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); @@ -384,31 +598,79 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) && (reg->flags & KBASE_REG_SECURE)) { - katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED; + katom->atom_flags |= KBASE_KATOM_FLAG_SECURE; + if ((katom->core_req & BASE_JD_REQ_FS) == 0) { + WARN_RATELIMIT(1, "Secure non-fragment jobs not supported"); + goto failed_loop; + } } - alloc = kbase_map_external_resource(katom->kctx, reg, - current->mm -#ifdef CONFIG_KDS - , &kds_res_count, kds_resources, - kds_access_bitmap, exclusive + /* decide what needs to happen for this resource */ + switch (reg->gpu_alloc->type) { + case BASE_MEM_IMPORT_TYPE_USER_BUFFER: { + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; + if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { + /* use a local variable to not pollute + * err_ret_val with a potential success + * value as some other gotos depend on + * the default error code stored in + * err_ret_val */ + int tmp; + + tmp = kbase_jd_user_buf_map(katom->kctx, + reg); + if (0 != tmp) { + /* failed to map this buffer, + * roll back */ + err_ret_val = tmp; + reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; + goto failed_loop; + } + } + } + break; + case BASE_MEM_IMPORT_TYPE_UMP: { +#if defined(CONFIG_KDS) && defined(CONFIG_UMP) + struct kds_resource *kds_res; + + kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle); + if (kds_res) + add_kds_resource(kds_res, kds_resources, &kds_res_count, + kds_access_bitmap, + res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE); +#endif /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */ + break; + } +#ifdef CONFIG_DMA_SHARED_BUFFER + case BASE_MEM_IMPORT_TYPE_UMM: { +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + struct kds_resource *kds_res; + + kds_res = get_dma_buf_kds_resource(reg->gpu_alloc->imported.umm.dma_buf); + if (kds_res) + add_kds_resource(kds_res, kds_resources, &kds_res_count, kds_access_bitmap, res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE); #endif - ); - if (!alloc) { - err_ret_val = -EINVAL; - goto failed_loop; + reg->gpu_alloc->imported.umm.current_mapping_usage_count++; + if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) { + /* use a local variable to not pollute err_ret_val + * with a potential success value as some other gotos depend + * on the default error code stored in err_ret_val */ + int tmp; + + tmp = kbase_jd_umm_map(katom->kctx, reg); + if (tmp) { + /* failed to map this buffer, roll back */ + err_ret_val = tmp; + reg->gpu_alloc->imported.umm.current_mapping_usage_count--; + goto failed_loop; + } + } + break; } - -#ifdef CONFIG_MALI_DMA_FENCE - if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { - struct reservation_object *resv; - - resv = reg->gpu_alloc->imported.umm.dma_buf->resv; - if (resv) - kbase_dma_fence_add_reservation(resv, &info, - exclusive); +#endif + default: + goto failed_loop; } -#endif /* CONFIG_MALI_DMA_FENCE */ /* finish with updating out array with the data we found */ /* NOTE: It is important that this is the last thing we do (or @@ -417,15 +679,12 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st * until the last read for an element. * */ katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ - katom->extres[res_no].alloc = alloc; + katom->extres[res_no].alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); } /* successfully parsed the extres array */ /* drop the vm lock before we call into kds */ kbase_gpu_vm_unlock(katom->kctx); - /* Release the processes mmap lock */ - up_read(¤t->mm->mmap_sem); - #ifdef CONFIG_KDS if (kds_res_count) { int wait_failed; @@ -450,63 +709,46 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st kfree(kds_access_bitmap); #endif /* CONFIG_KDS */ -#ifdef CONFIG_MALI_DMA_FENCE - if (info.dma_fence_resv_count) { - int ret; - - ret = kbase_dma_fence_wait(katom, &info); - if (ret < 0) - goto failed_dma_fence_setup; - } - - kfree(info.resv_objs); - kfree(info.dma_fence_excl_bitmap); -#endif /* CONFIG_MALI_DMA_FENCE */ - /* all done OK */ return 0; /* error handling section */ -#ifdef CONFIG_MALI_DMA_FENCE -failed_dma_fence_setup: #ifdef CONFIG_KDS - /* If we are here, dma_fence setup failed but KDS didn't. - * Revert KDS setup if any. - */ - if (kds_res_count) { - mutex_unlock(&katom->kctx->jctx.lock); - kds_resource_set_release_sync(&katom->kds_rset); - mutex_lock(&katom->kctx->jctx.lock); - - kbase_jd_kds_waiters_remove(katom); - katom->kds_dep_satisfied = true; - } -#endif /* CONFIG_KDS */ -#endif /* CONFIG_MALI_DMA_FENCE */ -#ifdef CONFIG_KDS -failed_kds_setup: -#endif -#if defined(CONFIG_KDS) || defined(CONFIG_MALI_DMA_FENCE) - /* Lock the processes mmap lock */ - down_read(¤t->mm->mmap_sem); + failed_kds_setup: /* lock before we unmap */ kbase_gpu_vm_lock(katom->kctx); -#endif +#endif /* CONFIG_KDS */ failed_loop: /* undo the loop work */ while (res_no-- > 0) { struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; +#ifdef CONFIG_DMA_SHARED_BUFFER + if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + alloc->imported.umm.current_mapping_usage_count--; + + if (0 == alloc->imported.umm.current_mapping_usage_count) { + struct kbase_va_region *reg; + + reg = kbase_region_tracker_find_region_base_address( + katom->kctx, + katom->extres[res_no].gpu_address); - kbase_unmap_external_resource(katom->kctx, NULL, alloc); + if (reg && reg->gpu_alloc == alloc) + kbase_mmu_teardown_pages(katom->kctx, + reg->start_pfn, + kbase_reg_current_backed_size(reg)); + + kbase_jd_umm_unmap(katom->kctx, alloc); + } + } +#endif /* CONFIG_DMA_SHARED_BUFFER */ + kbase_mem_phy_alloc_put(alloc); } kbase_gpu_vm_unlock(katom->kctx); - /* Release the processes mmap lock */ - up_read(¤t->mm->mmap_sem); - early_err_out: kfree(katom->extres); katom->extres = NULL; @@ -514,33 +756,35 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st kfree(kds_resources); kfree(kds_access_bitmap); #endif /* CONFIG_KDS */ -#ifdef CONFIG_MALI_DMA_FENCE - kfree(info.resv_objs); - kfree(info.dma_fence_excl_bitmap); -#endif return err_ret_val; } static inline void jd_resolve_dep(struct list_head *out_list, struct kbase_jd_atom *katom, - u8 d, bool ctx_is_dying) + u8 d, + bool ctx_is_dying) { u8 other_d = !d; while (!list_empty(&katom->dep_head[d])) { struct kbase_jd_atom *dep_atom; - struct kbase_jd_atom *other_dep_atom; u8 dep_type; dep_atom = list_entry(katom->dep_head[d].next, struct kbase_jd_atom, dep_item[d]); + list_del(katom->dep_head[d].next); dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]); kbase_jd_katom_dep_clear(&dep_atom->dep[d]); if (katom->event_code != BASE_JD_EVENT_DONE && - (dep_type != BASE_JD_DEP_TYPE_ORDER)) { + (dep_type != BASE_JD_DEP_TYPE_ORDER || ctx_is_dying)) { + /* Atom failed, so remove the other dependencies and immediately fail the atom */ + if (kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) { + list_del(&dep_atom->dep_item[other_d]); + kbase_jd_katom_dep_clear(&dep_atom->dep[other_d]); + } #ifdef CONFIG_KDS if (!dep_atom->kds_dep_satisfied) { /* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and @@ -550,67 +794,17 @@ static inline void jd_resolve_dep(struct list_head *out_list, } #endif -#ifdef CONFIG_MALI_DMA_FENCE - kbase_dma_fence_cancel_callbacks(dep_atom); -#endif - dep_atom->event_code = katom->event_code; KBASE_DEBUG_ASSERT(dep_atom->status != KBASE_JD_ATOM_STATE_UNUSED); + dep_atom->status = KBASE_JD_ATOM_STATE_COMPLETED; - if ((dep_atom->core_req & BASE_JD_REQ_SOFT_REPLAY) - != BASE_JD_REQ_SOFT_REPLAY) { - dep_atom->will_fail_event_code = - dep_atom->event_code; - } else { - dep_atom->status = - KBASE_JD_ATOM_STATE_COMPLETED; - } - } - other_dep_atom = (struct kbase_jd_atom *) - kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]); - - if (!dep_atom->in_jd_list && (!other_dep_atom || - (IS_GPU_ATOM(dep_atom) && !ctx_is_dying && - !dep_atom->will_fail_event_code && - !other_dep_atom->will_fail_event_code))) { - bool dep_satisfied = true; -#ifdef CONFIG_MALI_DMA_FENCE - int dep_count; - - dep_count = atomic_read(&dep_atom->dma_fence.dep_count); - if (likely(dep_count == -1)) { - dep_satisfied = true; - } else if (dep_count == 0) { - /* - * All fences for this atom has signaled, but - * the worker that will queue the atom has not - * yet run. - * - * Mark the atom as handled by setting - * dep_count to -1 so that the worker doesn't - * queue the atom again. - */ - atomic_set(&dep_atom->dma_fence.dep_count, -1); - /* - * Remove the atom from the list of dma-fence - * waiting atoms. - */ - kbase_dma_fence_waiters_remove(dep_atom); - dep_satisfied = true; - } else { - dep_satisfied = false; - } -#endif /* CONFIG_MALI_DMA_FENCE */ - + list_add_tail(&dep_atom->dep_item[0], out_list); + } else if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) { #ifdef CONFIG_KDS - dep_satisfied = dep_satisfied && dep_atom->kds_dep_satisfied; + if (dep_atom->kds_dep_satisfied) #endif - - if (dep_satisfied) { - dep_atom->in_jd_list = true; - list_add_tail(&dep_atom->jd_item, out_list); - } + list_add_tail(&dep_atom->dep_item[0], out_list); } } } @@ -653,7 +847,7 @@ static void jd_check_force_failure(struct kbase_jd_atom *katom) kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) { struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i]; - if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) == + if ((dep_atom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_SOFT_REPLAY && (dep_atom->core_req & kbdev->force_replay_core_req) == kbdev->force_replay_core_req) { @@ -665,36 +859,6 @@ static void jd_check_force_failure(struct kbase_jd_atom *katom) } #endif -static void jd_try_submitting_deps(struct list_head *out_list, - struct kbase_jd_atom *node) -{ - int i; - - for (i = 0; i < 2; i++) { - struct list_head *pos; - - list_for_each(pos, &node->dep_head[i]) { - struct kbase_jd_atom *dep_atom = list_entry(pos, - struct kbase_jd_atom, dep_item[i]); - - if (IS_GPU_ATOM(dep_atom) && !dep_atom->in_jd_list) { - /*Check if atom deps look sane*/ - bool dep0_valid = !dep_atom->dep[0].atom || - (dep_atom->dep[0].atom->status - >= KBASE_JD_ATOM_STATE_IN_JS); - bool dep1_valid = !dep_atom->dep[1].atom || - (dep_atom->dep[1].atom->status - >= KBASE_JD_ATOM_STATE_IN_JS); - - if (dep0_valid && dep1_valid) { - dep_atom->in_jd_list = true; - list_add(&dep_atom->jd_item, out_list); - } - } - } - } -} - /* * Perform the necessary handling of an atom that has finished running * on the GPU. @@ -709,6 +873,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, { struct kbase_context *kctx = katom->kctx; struct kbase_device *kbdev = kctx->kbdev; + struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; struct list_head completed_jobs; struct list_head runnable_jobs; bool need_to_try_schedule_context = false; @@ -723,6 +888,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, jd_check_force_failure(katom); #endif + /* This is needed in case an atom is failed due to being invalid, this * can happen *before* the jobs that the atom depends on have completed */ for (i = 0; i < 2; i++) { @@ -749,16 +915,17 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, } katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - list_add_tail(&katom->jd_item, &completed_jobs); + list_add_tail(&katom->dep_item[0], &completed_jobs); while (!list_empty(&completed_jobs)) { - katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, jd_item); + katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, dep_item[0]); list_del(completed_jobs.prev); + KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); for (i = 0; i < 2; i++) jd_resolve_dep(&runnable_jobs, katom, i, - kctx->jctx.sched_info.ctx.is_dying); + js_kctx_info->ctx.is_dying); if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) kbase_jd_post_external_resources(katom); @@ -767,47 +934,35 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, struct kbase_jd_atom *node; node = list_entry(runnable_jobs.next, - struct kbase_jd_atom, jd_item); + struct kbase_jd_atom, dep_item[0]); + list_del(runnable_jobs.next); - node->in_jd_list = false; KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); - if (node->status != KBASE_JD_ATOM_STATE_COMPLETED && - !kctx->jctx.sched_info.ctx.is_dying) { + if (node->status != KBASE_JD_ATOM_STATE_COMPLETED) { need_to_try_schedule_context |= jd_run_atom(node); } else { node->event_code = katom->event_code; - if ((node->core_req & - BASE_JD_REQ_SOFT_JOB_TYPE) == - BASE_JD_REQ_SOFT_REPLAY) { + if ((node->core_req & BASEP_JD_REQ_ATOM_TYPE) + == BASE_JD_REQ_SOFT_REPLAY) { if (kbase_replay_process(node)) /* Don't complete this atom */ continue; } else if (node->core_req & BASE_JD_REQ_SOFT_JOB) { - /* If this is a fence wait soft job - * then remove it from the list of sync - * waiters. - */ + /* If this is a fence wait then remove it from the list of sync waiters. */ if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req) - kbasep_remove_waiting_soft_job(node); + list_del(&node->dep_item[0]); kbase_finish_soft_job(node); } node->status = KBASE_JD_ATOM_STATE_COMPLETED; } - if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) { - list_add_tail(&node->jd_item, &completed_jobs); - } else if (node->status == KBASE_JD_ATOM_STATE_IN_JS && - !node->will_fail_event_code) { - /* Node successfully submitted, try submitting - * dependencies as they may now be representable - * in JS */ - jd_try_submitting_deps(&runnable_jobs, node); - } + if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) + list_add_tail(&node->dep_item[0], &completed_jobs); } /* Register a completed job as a disjoint event when the GPU @@ -815,7 +970,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom, */ kbase_disjoint_event_potential(kctx->kbdev); if (completed_jobs_ctx) - list_add_tail(&katom->jd_item, completed_jobs_ctx); + list_add_tail(&katom->dep_item[0], completed_jobs_ctx); else kbase_event_post(kctx, katom); @@ -887,19 +1042,23 @@ static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) } #endif -bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom) +bool jd_submit_atom(struct kbase_context *kctx, + const struct base_jd_atom_v2 *user_atom, + struct kbase_jd_atom *katom) { struct kbase_jd_context *jctx = &kctx->jctx; + base_jd_core_req core_req; int queued = 0; int i; int sched_prio; bool ret; - bool will_fail = false; /* Update the TOTAL number of jobs. This includes those not tracked by * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ jctx->job_nr++; + core_req = user_atom->core_req; + katom->start_timestamp.tv64 = 0; katom->time_spent_us = 0; katom->udata = user_atom->udata; @@ -910,28 +1069,18 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us katom->affinity = 0; katom->jc = user_atom->jc; katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; - katom->core_req = user_atom->core_req; + katom->core_req = core_req; katom->atom_flags = 0; katom->retry_count = 0; katom->need_cache_flush_cores_retained = 0; - katom->pre_dep = NULL; - katom->post_dep = NULL; katom->x_pre_dep = NULL; katom->x_post_dep = NULL; - katom->will_fail_event_code = BASE_JD_EVENT_NOT_STARTED; - katom->exit_protected_state = KBASE_ATOM_EXIT_PROTECTED_CHECK; - katom->age = kctx->age_count++; - - INIT_LIST_HEAD(&katom->jd_item); #ifdef CONFIG_KDS /* Start by assuming that the KDS dependencies are satisfied, * kbase_jd_pre_external_resources will correct this if there are dependencies */ katom->kds_dep_satisfied = true; katom->kds_rset = NULL; #endif /* CONFIG_KDS */ -#ifdef CONFIG_MALI_DMA_FENCE - atomic_set(&katom->dma_fence.dep_count, -1); -#endif /* Don't do anything if there is a mess up with dependencies. This is done in a separate cycle to check both the dependencies at ones, otherwise @@ -947,7 +1096,7 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us dep_atom_type != BASE_JD_DEP_TYPE_DATA) { katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; katom->status = KBASE_JD_ATOM_STATE_COMPLETED; - +#if defined(CONFIG_MALI_MIPE_ENABLED) /* Wrong dependency setup. Atom will be sent * back to user space. Do not record any * dependencies. */ @@ -956,7 +1105,7 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us kbase_jd_atom_id(kctx, katom)); kbase_tlstream_tl_ret_atom_ctx( katom, kctx); - +#endif ret = jd_done_nolock(katom, NULL); goto out; } @@ -989,10 +1138,16 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us continue; } + if (i == 1 && kbase_jd_katom_dep_atom(&katom->dep[0])) { + /* Remove the previous dependency */ + list_del(&katom->dep_item[0]); + kbase_jd_katom_dep_clear(&katom->dep[0]); + } + /* Atom has completed, propagate the error code if any */ katom->event_code = dep_atom->event_code; katom->status = KBASE_JD_ATOM_STATE_QUEUED; - +#if defined(CONFIG_MALI_MIPE_ENABLED) /* This atom is going through soft replay or * will be sent back to user space. Do not record any * dependencies. */ @@ -1000,16 +1155,17 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us katom, kbase_jd_atom_id(kctx, katom)); kbase_tlstream_tl_ret_atom_ctx(katom, kctx); - - if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) +#endif + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_SOFT_REPLAY) { if (kbase_replay_process(katom)) { ret = false; goto out; } } - will_fail = true; + ret = jd_done_nolock(katom, NULL); + goto out; } else { /* Atom is in progress, add this atom to the list */ list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]); @@ -1018,25 +1174,12 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us } } - if (will_fail) { - if (!queued) { - ret = jd_done_nolock(katom, NULL); - - goto out; - } else { - katom->will_fail_event_code = katom->event_code; - ret = false; - - goto out; - } - } else { - /* These must occur after the above loop to ensure that an atom - * that depends on a previous atom with the same number behaves - * as expected */ - katom->event_code = BASE_JD_EVENT_DONE; - katom->status = KBASE_JD_ATOM_STATE_QUEUED; - } + /* These must occur after the above loop to ensure that an atom that + * depends on a previous atom with the same number behaves as expected */ + katom->event_code = BASE_JD_EVENT_DONE; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; +#if defined(CONFIG_MALI_MIPE_ENABLED) /* Create a new atom recording all dependencies it was set up with. */ kbase_tlstream_tl_new_atom( katom, @@ -1044,26 +1187,15 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us kbase_tlstream_tl_ret_atom_ctx(katom, kctx); for (i = 0; i < 2; i++) if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type( - &katom->dep[i])) { + &katom->dep[i])) kbase_tlstream_tl_dep_atom_atom( (void *)kbase_jd_katom_dep_atom( &katom->dep[i]), (void *)katom); - } else if (BASE_JD_DEP_TYPE_INVALID != - user_atom->pre_dep[i].dependency_type) { - /* Resolved dependency. */ - int dep_atom_number = - user_atom->pre_dep[i].atom_id; - struct kbase_jd_atom *dep_atom = - &jctx->atoms[dep_atom_number]; - - kbase_tlstream_tl_rdep_atom_atom( - (void *)dep_atom, - (void *)katom); - } +#endif /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ - if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + if (!katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL"); katom->event_code = BASE_JD_EVENT_JOB_INVALID; ret = jd_done_nolock(katom, NULL); @@ -1081,17 +1213,6 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us goto out; } - /* Reject atoms with invalid core requirements */ - if ((katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) && - (katom->core_req & BASE_JD_REQ_EVENT_COALESCE)) { - dev_warn(kctx->kbdev->dev, - "Rejecting atom with invalid core requirements"); - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE; - ret = jd_done_nolock(katom, NULL); - goto out; - } - /* For invalid priority, be most lenient and choose the default */ sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) @@ -1148,15 +1269,7 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us } #endif /* CONFIG_KDS */ - -#ifdef CONFIG_MALI_DMA_FENCE - if (atomic_read(&katom->dma_fence.dep_count) != -1) { - ret = false; - goto out; - } -#endif /* CONFIG_MALI_DMA_FENCE */ - - if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_SOFT_REPLAY) { if (kbase_replay_process(katom)) ret = false; @@ -1170,9 +1283,10 @@ bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *us ret = jd_done_nolock(katom, NULL); goto out; } - + /* The job has not yet completed */ + list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs); ret = false; - } else if ((katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + } else if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { katom->status = KBASE_JD_ATOM_STATE_IN_JS; ret = kbasep_js_add_job(kctx, katom); /* If job was cancelled then resolve immediately */ @@ -1258,7 +1372,7 @@ int kbase_jd_submit(struct kbase_context *kctx, user_atom.udata = user_atom_v6.udata; user_atom.extres_list = user_atom_v6.extres_list; user_atom.nr_extres = user_atom_v6.nr_extres; - user_atom.core_req = (u32)(user_atom_v6.core_req & 0x7fff); + user_atom.core_req = user_atom_v6.core_req; /* atom number 0 is used for no dependency atoms */ if (!user_atom_v6.pre_dep[0]) @@ -1290,12 +1404,6 @@ int kbase_jd_submit(struct kbase_context *kctx, } #endif /* BASE_LEGACY_UK6_SUPPORT */ -#ifdef BASE_LEGACY_UK10_2_SUPPORT - if (KBASE_API_VERSION(10, 3) > kctx->api_version) - user_atom.core_req = (u32)(user_atom.compat_core_req - & 0x7fff); -#endif /* BASE_LEGACY_UK10_2_SUPPORT */ - user_addr = (void __user *)((uintptr_t) user_addr + submit_data->stride); mutex_lock(&jctx->lock); @@ -1370,6 +1478,7 @@ void kbase_jd_done_worker(struct work_struct *data) struct kbase_jd_context *jctx; struct kbase_context *kctx; struct kbasep_js_kctx_info *js_kctx_info; + union kbasep_js_policy *js_policy; struct kbase_device *kbdev; struct kbasep_js_device_data *js_devdata; u64 cache_jc = katom->jc; @@ -1388,6 +1497,7 @@ void kbase_jd_done_worker(struct work_struct *data) kbdev = kctx->kbdev; js_kctx_info = &kctx->jctx.sched_info; js_devdata = &kbdev->js_data; + js_policy = &kbdev->js_data.policy; KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); @@ -1412,6 +1522,7 @@ void kbase_jd_done_worker(struct work_struct *data) mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); + mutex_unlock(&jctx->lock); spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); @@ -1419,7 +1530,6 @@ void kbase_jd_done_worker(struct work_struct *data) kbase_js_unpull(kctx, katom); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - mutex_unlock(&jctx->lock); return; } @@ -1529,7 +1639,7 @@ void kbase_jd_done_worker(struct work_struct *data) while (!list_empty(&kctx->completed_jobs)) { struct kbase_jd_atom *atom = list_entry( kctx->completed_jobs.next, - struct kbase_jd_atom, jd_item); + struct kbase_jd_atom, dep_item[0]); list_del(kctx->completed_jobs.next); kbase_event_post(kctx, atom); @@ -1604,6 +1714,51 @@ static void jd_cancel_worker(struct work_struct *data) kbase_js_sched_all(kbdev); } +/** + * jd_evict_worker - Work queue job evict function + * @data: a &struct work_struct + * + * Only called as part of evicting failed jobs. This is only called on jobs that + * were never submitted to HW Access. Jobs that were submitted are handled + * through kbase_jd_done_worker(). + * Operates serially with the kbase_jd_done_worker() on the work queue. + * + * We don't need to release most of the resources that would occur on + * kbase_jd_done() or kbase_jd_done_worker(), because the atoms here must not be + * running (by virtue of having not been submitted to HW Access). + */ +static void jd_evict_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, + work); + struct kbase_jd_context *jctx; + struct kbase_context *kctx; + struct kbasep_js_kctx_info *js_kctx_info; + struct kbase_device *kbdev; + + /* Soft jobs should never reach this function */ + KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); + + kctx = katom->kctx; + kbdev = kctx->kbdev; + jctx = &kctx->jctx; + js_kctx_info = &kctx->jctx.sched_info; + + KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); + + /* Scheduler: Remove the job from the system */ + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + kbasep_js_remove_cancelled_job(kbdev, kctx, katom); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + mutex_lock(&jctx->lock); + jd_done_nolock(katom, NULL); + /* katom may have been freed now, do not use! */ + mutex_unlock(&jctx->lock); + + kbase_js_sched_all(kbdev); +} + /** * kbase_jd_done - Complete a job that has been removed from the Hardware * @katom: atom which has been completed @@ -1647,8 +1802,7 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, #ifdef CONFIG_DEBUG_FS /* a failed job happened and is waiting for dumping*/ - if (!katom->will_fail_event_code && - kbase_debug_job_fault_process(katom, katom->event_code)) + if (kbase_debug_job_fault_process(katom, katom->event_code)) return; #endif @@ -1686,6 +1840,30 @@ void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) queue_work(kctx->jctx.job_done_wq, &katom->work); } +void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx; + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(NULL != katom); + kctx = katom->kctx; + KBASE_DEBUG_ASSERT(NULL != kctx); + + js_kctx_info = &kctx->jctx.sched_info; + + KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); + + /* This should only be done from a context that is currently scheduled + */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled); + + WARN_ON(work_pending(&katom->work)); + + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); + INIT_WORK(&katom->work, jd_evict_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} void kbase_jd_zap_context(struct kbase_context *kctx) { @@ -1708,9 +1886,8 @@ void kbase_jd_zap_context(struct kbase_context *kctx) * queued outside the job scheduler. */ - del_timer_sync(&kctx->soft_job_timeout); list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { - katom = list_entry(entry, struct kbase_jd_atom, queue); + katom = list_entry(entry, struct kbase_jd_atom, dep_item[0]); kbase_cancel_soft_job(katom); } @@ -1733,19 +1910,8 @@ void kbase_jd_zap_context(struct kbase_context *kctx) } #endif -#ifdef CONFIG_MALI_DMA_FENCE - kbase_dma_fence_cancel_all_atoms(kctx); -#endif - mutex_unlock(&kctx->jctx.lock); -#ifdef CONFIG_MALI_DMA_FENCE - /* Flush dma-fence workqueue to ensure that any callbacks that may have - * been queued are done before continuing. - */ - flush_workqueue(kctx->dma_fence.wq); -#endif - kbase_jm_wait_for_zero_jobs(kctx); } @@ -1761,8 +1927,7 @@ int kbase_jd_init(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(kctx); - kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", - WQ_HIGHPRI | WQ_UNBOUND, 1); + kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", 0, 1); if (NULL == kctx->jctx.job_done_wq) { mali_err = -ENOMEM; goto out1; @@ -1777,12 +1942,6 @@ int kbase_jd_init(struct kbase_context *kctx) /* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */ kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID; kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED; - -#ifdef CONFIG_MALI_DMA_FENCE - kctx->jctx.atoms[i].dma_fence.context = fence_context_alloc(1); - atomic_set(&kctx->jctx.atoms[i].dma_fence.seqno, 0); - INIT_LIST_HEAD(&kctx->jctx.atoms[i].dma_fence.callbacks); -#endif } mutex_init(&kctx->jctx.lock); diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c index ac6c3ce333ed7..78761e69d04df 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,7 +25,9 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif +#if defined(CONFIG_MALI_MIPE_ENABLED) #include +#endif #include #include @@ -77,6 +79,13 @@ static int kbase_js_get_slot(struct kbase_device *kbdev, static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, kbasep_js_policy_ctx_job_cb callback); +static bool kbase_js_evict_atom(struct kbase_context *kctx, + struct kbase_jd_atom *katom_evict, + struct kbase_jd_atom *start_katom, + struct kbase_jd_atom *head_katom, + struct list_head *evict_list, + struct jsctx_rb *rb, int idx); + /* Helper for trace subcodes */ #if KBASE_TRACE_ENABLE static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, @@ -230,6 +239,26 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, return result; } +/** + * jsctx_rb_is_empty_prio(): - Check if ring buffer is empty + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * @prio: Priority to check. + * + * Caller must hold runpool_irq.lock + * + * Return: true if the ring buffer is empty, false otherwise. + */ +static inline bool +jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio) +{ + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + return rb->running_idx == rb->write_idx; +} + /** * jsctx_rb_none_to_pull_prio(): - Check if there are no pullable atoms * @kctx: Pointer to kbase context with ring buffer. @@ -241,16 +270,18 @@ bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, * ring buffer to be full (with running atoms) when this functions returns * true. * + * Caller must hold runpool_irq.lock + * * Return: true if there are no atoms to pull, false otherwise. */ static inline bool jsctx_rb_none_to_pull_prio(struct kbase_context *kctx, int js, int prio) { - struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - return RB_EMPTY_ROOT(&rb->runnable_tree); + return rb->read_idx == rb->write_idx; } /** @@ -280,68 +311,115 @@ jsctx_rb_none_to_pull(struct kbase_context *kctx, int js) } /** - * jsctx_queue_foreach_prio(): - Execute callback for each entry in the queue. - * @kctx: Pointer to kbase context with the queue. + * jsctx_rb_compact_prio(): - Compact a ring buffer + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to compact. + * @prio: Priority id to compact. + */ +static inline void +jsctx_rb_compact_prio(struct kbase_context *kctx, int js, int prio) +{ + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + u16 compact_idx = rb->write_idx - 1; + u16 end_idx = rb->running_idx - 1; + u16 i; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->jctx.lock); + + for (i = compact_idx; i != end_idx; i--) { + if (rb->entries[i & JSCTX_RB_MASK].atom_id != + KBASEP_ATOM_ID_INVALID) { + WARN_ON(compact_idx < rb->running_idx); + rb->entries[compact_idx & JSCTX_RB_MASK].atom_id = + rb->entries[i & JSCTX_RB_MASK].atom_id; + + compact_idx--; + } + if (rb->read_idx == i) + rb->read_idx = compact_idx + 1; + } + + rb->running_idx = compact_idx + 1; +} + +/** + * jsctx_rb_compact(): - Compact all priority ring buffers + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to compact. + */ +static inline void +jsctx_rb_compact(struct kbase_context *kctx, int js) +{ + int prio; + + for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) + jsctx_rb_compact_prio(kctx, js, prio); +} + +/** + * jsctx_rb_foreach_prio(): - Execute callback for each entry in ring buffer + * @kctx: Pointer to kbase context with ring buffer. * @js: Job slot id to iterate. * @prio: Priority id to iterate. * @callback: Function pointer to callback. * - * Iterate over a queue and invoke @callback for each entry in the queue, and - * remove the entry from the queue. + * Iterate over a ring buffer and invoke @callback for each entry in buffer, and + * remove the entry from the buffer. * - * If entries are added to the queue while this is running those entries may, or - * may not be covered. To ensure that all entries in the buffer have been - * enumerated when this function returns jsctx->lock must be held when calling - * this function. + * If entries are added to the ring buffer while this is running those entries + * may, or may not be covered. To ensure that all entries in the buffer have + * been enumerated when this function returns jsctx->lock must be held when + * calling this function. * * The HW access lock, js_data.runpool_irq.lock, must always be held when * calling this function. */ static void -jsctx_queue_foreach_prio(struct kbase_context *kctx, int js, int prio, +jsctx_rb_foreach_prio(struct kbase_context *kctx, int js, int prio, kbasep_js_policy_ctx_job_cb callback) { - struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + struct kbase_jd_atom *katom; + u16 write_idx = ACCESS_ONCE(rb->write_idx); lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - while (!RB_EMPTY_ROOT(&queue->runnable_tree)) { - struct rb_node *node = rb_first(&queue->runnable_tree); - struct kbase_jd_atom *entry = rb_entry(node, - struct kbase_jd_atom, runnable_tree_node); + /* There must be no jobs currently in HW access */ + WARN_ON(rb->read_idx != rb->running_idx); - rb_erase(node, &queue->runnable_tree); - callback(kctx->kbdev, entry); - } + /* Invoke callback on all kbase_jd_atoms in the ring buffer, and + * removes them from the buffer */ + while (rb->read_idx != write_idx) { + int id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id; - while (!list_empty(&queue->x_dep_head)) { - struct kbase_jd_atom *entry = list_entry(queue->x_dep_head.next, - struct kbase_jd_atom, queue); + katom = kbase_jd_atom_from_id(kctx, id); - list_del(queue->x_dep_head.next); + rb->read_idx++; + rb->running_idx++; - callback(kctx->kbdev, entry); + callback(kctx->kbdev, katom); } } /** - * jsctx_queue_foreach(): - Execute callback for each entry in every queue - * @kctx: Pointer to kbase context with queue. + * jsctx_rb_foreach(): - Execute callback for each entry in all priority rb + * @kctx: Pointer to kbase context with ring buffer. * @js: Job slot id to iterate. * @callback: Function pointer to callback. * * Iterate over all the different priorities, and for each call - * jsctx_queue_foreach_prio() to iterate over the queue and invoke @callback - * for each entry, and remove the entry from the queue. + * jsctx_rb_foreach_prio() to iterate over the ring buffer and invoke @callback + * for each entry in buffer, and remove the entry from the buffer. */ static inline void -jsctx_queue_foreach(struct kbase_context *kctx, int js, +jsctx_rb_foreach(struct kbase_context *kctx, int js, kbasep_js_policy_ctx_job_cb callback) { int prio; for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) - jsctx_queue_foreach_prio(kctx, js, prio, callback); + jsctx_rb_foreach_prio(kctx, js, prio, callback); } /** @@ -358,16 +436,16 @@ jsctx_queue_foreach(struct kbase_context *kctx, int js, static inline struct kbase_jd_atom * jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) { - struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; - struct rb_node *node; + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + int id; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - node = rb_first(&rb->runnable_tree); - if (!node) + if (jsctx_rb_none_to_pull_prio(kctx, js, prio)) return NULL; - return rb_entry(node, struct kbase_jd_atom, runnable_tree_node); + id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id; + return kbase_jd_atom_from_id(kctx, id); } /** @@ -379,8 +457,6 @@ jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a * pointer to the next atom, unless all the priority's ring buffers are empty. * - * Caller must hold the runpool_irq.lock. - * * Return: Pointer to next atom in buffer, or NULL if there is no atom. */ static inline struct kbase_jd_atom * @@ -388,8 +464,6 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) { int prio; - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { struct kbase_jd_atom *katom; @@ -401,6 +475,35 @@ jsctx_rb_peek(struct kbase_context *kctx, int js) return NULL; } +/** + * jsctx_rb_peek_last(): - Check a ring buffer and get the last atom + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * @prio: Priority id to check. + * + * Check the ring buffer for the specified @js and @prio and return a + * pointer to the last atom, unless all the priority's ring buffers are empty. + * + * The last atom is the atom that was added using jsctx_rb_add() most recently. + * + * Return: Pointer to last atom in buffer, or NULL if there is no atom. + */ +static inline struct kbase_jd_atom * +jsctx_rb_peek_last(struct kbase_context *kctx, int js, int prio) +{ + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + int id; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->jctx.lock); + + if (jsctx_rb_is_empty_prio(kctx, js, prio)) + return NULL; + + id = rb->entries[(rb->write_idx - 1) & JSCTX_RB_MASK].atom_id; + return kbase_jd_atom_from_id(kctx, id); +} + /** * jsctx_rb_pull(): - Mark atom in list as running * @kctx: Pointer to kbase context with ring buffer. @@ -415,71 +518,158 @@ jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; int js = katom->slot_nr; - struct jsctx_queue *rb = &kctx->jsctx_queue[prio][js]; + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); /* Atoms must be pulled in the correct order. */ WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); - rb_erase(&katom->runnable_tree_node, &rb->runnable_tree); + rb->read_idx++; } -#define LESS_THAN_WRAP(a, b) ((s32)(a - b) < 0) - -static void -jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom) +/** + * jsctx_rb_unpull(): - Undo marking of atom in list as running + * @kctx: Pointer to kbase context with ring buffer. + * @katom: Pointer to katom to unpull. + * + * Undo jsctx_rb_pull() and put @katom back in the queue. + * + * jsctx_rb_unpull() must be called on atoms in the same order the atoms were + * pulled. + */ +static inline void +jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) { int prio = katom->sched_priority; int js = katom->slot_nr; - struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - struct rb_node **new = &(queue->runnable_tree.rb_node), *parent = NULL; + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - while (*new) { - struct kbase_jd_atom *entry = container_of(*new, - struct kbase_jd_atom, runnable_tree_node); + /* Atoms must be unpulled in correct order. */ + WARN_ON(rb->entries[(rb->read_idx - 1) & JSCTX_RB_MASK].atom_id != + kbase_jd_atom_id(kctx, katom)); - parent = *new; - if (LESS_THAN_WRAP(katom->age, entry->age)) - new = &((*new)->rb_left); - else - new = &((*new)->rb_right); - } + rb->read_idx--; +} - /* Add new node and rebalance tree. */ - rb_link_node(&katom->runnable_tree_node, parent, new); - rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree); +/** + * jsctx_rb_add(): - Add atom to ring buffer + * @kctx: Pointer to kbase context with ring buffer. + * @katom: Pointer to katom to add. + * + * Add @katom to the ring buffer determined by the atom's priority and job slot + * number. + * + * If the ring buffer is full -EBUSY will be returned. + * + * Return: On success 0 is returned, on failure a negative error code. + */ +static int +jsctx_rb_add_atom(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + + lockdep_assert_held(&kctx->jctx.lock); + + /* Check if the ring buffer is full */ + if ((rb->write_idx - rb->running_idx) >= JSCTX_RB_SIZE) + return -EBUSY; + + rb->entries[rb->write_idx & JSCTX_RB_MASK].atom_id = + kbase_jd_atom_id(kctx, katom); + rb->write_idx++; + + return 0; } /** - * jsctx_rb_unpull(): - Undo marking of atom in list as running + * jsctx_rb_remove(): - Remove atom from ring buffer * @kctx: Pointer to kbase context with ring buffer. - * @katom: Pointer to katom to unpull. + * @katom: Pointer to katom to remove. * - * Undo jsctx_rb_pull() and put @katom back in the queue. + * Remove @katom from the ring buffer. * - * jsctx_rb_unpull() must be called on atoms in the same order the atoms were - * pulled. + * @katom must have been pulled from the buffer earlier by jsctx_rb_pull(), and + * atoms must be removed in the same order they were pulled from the ring + * buffer. */ static inline void -jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +jsctx_rb_remove(struct kbase_context *kctx, struct kbase_jd_atom *katom) { + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - jsctx_tree_add(kctx, katom); + /* Atoms must be completed in order. */ + WARN_ON(rb->entries[rb->running_idx & JSCTX_RB_MASK].atom_id != + kbase_jd_atom_id(kctx, katom)); + + rb->running_idx++; } -static bool kbase_js_ctx_pullable(struct kbase_context *kctx, - int js, - bool is_scheduled); -static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js); -static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js); +/** + * jsctx_rb_evict(): - Evict atom, and dependents, from ring buffer + * @kctx: Pointer to kbase context with ring buffer. + * @start_katom: Pointer to the first katom to evict. + * @head_katom: Pointer to head katom. + * @evict_list: Pointer to head of list where evicted atoms are added. + * + * Iterate over the ring buffer starting at @start_katom and evict @start_atom + * and dependent atoms in ring buffer. + * + * @evict_list and @head_katom is passed on to kbase_js_evict_atom() which will + * examine the atom dependencies. + * + * jsctx_rb_evict() is only called by kbase_js_evict_deps(). + */ +static void +jsctx_rb_evict(struct kbase_context *kctx, + struct kbase_jd_atom *start_katom, + struct kbase_jd_atom *head_katom, + struct list_head *evict_list) +{ + int prio = start_katom->sched_priority; + int js = start_katom->slot_nr; + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + bool atom_in_rb = false; + u16 i, start_idx; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->jctx.lock); + + for (i = rb->running_idx; i != rb->write_idx; i++) { + if (rb->entries[i & JSCTX_RB_MASK].atom_id == + kbase_jd_atom_id(kctx, start_katom)) { + start_idx = i; + atom_in_rb = true; + break; + } + } + + /* start_katom must still be in ring buffer. */ + if (i == rb->write_idx || !atom_in_rb) + return; + + /* Evict all dependencies on same slot. */ + for (i = start_idx; i != rb->write_idx; i++) { + u8 katom_evict; + + katom_evict = rb->entries[i & JSCTX_RB_MASK].atom_id; + if (katom_evict != KBASEP_ATOM_ID_INVALID) { + if (!kbase_js_evict_atom(kctx, + &kctx->jctx.atoms[katom_evict], + start_katom, head_katom, + evict_list, rb, i)) + break; + } + } +} /* * Functions private to KBase ('Protected' functions) @@ -552,7 +742,6 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES; jsdd->cfs_ctx_runtime_min_slices = DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES; - atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT); dev_dbg(kbdev->dev, "JS Config Attribs: "); dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", @@ -579,8 +768,6 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) jsdd->cfs_ctx_runtime_init_slices); dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u", jsdd->cfs_ctx_runtime_min_slices); - dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i", - atomic_read(&jsdd->soft_job_timeout_ms)); if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss && jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && @@ -685,7 +872,7 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx) struct kbase_device *kbdev; struct kbasep_js_kctx_info *js_kctx_info; int err; - int i, j; + int i; KBASE_DEBUG_ASSERT(kctx != NULL); @@ -725,13 +912,6 @@ int kbasep_js_kctx_init(struct kbase_context * const kctx) if (js_kctx_info->init_status != JS_KCTX_INIT_ALL) return -EINVAL; - for (i = 0; i < KBASE_JS_ATOM_SCHED_PRIO_COUNT; i++) { - for (j = 0; j < BASE_JM_MAX_NR_SLOTS; j++) { - INIT_LIST_HEAD(&kctx->jsctx_queue[i][j].x_dep_head); - kctx->jsctx_queue[i][j].runnable_tree = RB_ROOT; - } - } - return 0; } @@ -786,25 +966,30 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) } /** - * kbase_js_ctx_list_add_pullable_nolock - Variant of - * kbase_jd_ctx_list_add_pullable() - * where the caller must hold - * runpool_irq.lock + * kbase_js_ctx_list_add_pullable - Add context to the tail of the per-slot + * pullable context queue * @kbdev: Device pointer * @kctx: Context to add to queue * @js: Job slot to use * - * Caller must hold runpool_irq.lock + * If the context is on either the pullable or unpullable queues, then it is + * removed before being added to the tail. + * + * This function should be used when queueing a context for the first time, or + * re-queueing a context that has been pulled from. + * + * Caller must hold kbasep_jd_device_data.queue_mutex * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, +static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev, struct kbase_context *kctx, int js) { bool ret = false; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->js_data.queue_mutex); + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -827,24 +1012,30 @@ static bool kbase_js_ctx_list_add_pullable_nolock(struct kbase_device *kbdev, } /** - * kbase_js_ctx_list_add_pullable_head_nolock - Variant of - * kbase_js_ctx_list_add_pullable_head() - * where the caller must hold - * runpool_irq.lock + * kbase_js_ctx_list_add_pullable_head - Add context to the head of the + * per-slot pullable context queue * @kbdev: Device pointer * @kctx: Context to add to queue * @js: Job slot to use * - * Caller must hold runpool_irq.lock + * If the context is on either the pullable or unpullable queues, then it is + * removed before being added to the head. + * + * This function should be used when a context has been scheduled, but no jobs + * can currently be pulled from it. + * + * Caller must hold kbasep_jd_device_data.queue_mutex * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_add_pullable_head_nolock( - struct kbase_device *kbdev, struct kbase_context *kctx, int js) +static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) { bool ret = false; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->js_data.queue_mutex); + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); @@ -867,37 +1058,8 @@ static bool kbase_js_ctx_list_add_pullable_head_nolock( } /** - * kbase_js_ctx_list_add_pullable_head - Add context to the head of the - * per-slot pullable context queue - * @kbdev: Device pointer - * @kctx: Context to add to queue - * @js: Job slot to use - * - * If the context is on either the pullable or unpullable queues, then it is - * removed before being added to the head. - * - * This function should be used when a context has been scheduled, but no jobs - * can currently be pulled from it. - * - * Return: true if caller should call kbase_backend_ctx_count_changed() - */ -static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, - struct kbase_context *kctx, - int js) -{ - bool ret; - unsigned long flags; - - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); - ret = kbase_js_ctx_list_add_pullable_head_nolock(kbdev, kctx, js); - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); - - return ret; -} - -/** - * kbase_js_ctx_list_add_unpullable_nolock - Add context to the tail of the - * per-slot unpullable context queue + * kbase_js_ctx_list_add_unpullable - Add context to the tail of the per-slot + * unpullable context queue * @kbdev: Device pointer * @kctx: Context to add to queue * @js: Job slot to use @@ -908,17 +1070,18 @@ static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, * This function should be used when a context has been pulled from, and there * are no jobs remaining on the specified slot. * - * Caller must hold runpool_irq.lock + * Caller must hold kbasep_jd_device_data.queue_mutex * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, +static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev, struct kbase_context *kctx, int js) { bool ret = false; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->js_data.queue_mutex); + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], &kbdev->js_data.ctx_list_unpullable[js]); @@ -938,8 +1101,8 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, } /** - * kbase_js_ctx_list_remove_nolock - Remove context from the per-slot pullable - * or unpullable context queues + * kbase_js_ctx_list_remove - Remove context from the per-slot pullable or + * unpullable context queues * @kbdev: Device pointer * @kctx: Context to remove from queue * @js: Job slot to use @@ -949,17 +1112,18 @@ static bool kbase_js_ctx_list_add_unpullable_nolock(struct kbase_device *kbdev, * This function should be used when a context has no jobs on the GPU, and no * jobs remaining for the specified slot. * - * Caller must hold runpool_irq.lock + * Caller must hold kbasep_jd_device_data.queue_mutex * * Return: true if caller should call kbase_backend_ctx_count_changed() */ -static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, +static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev, struct kbase_context *kctx, int js) { bool ret = false; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->js_data.queue_mutex); + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])); @@ -980,24 +1144,23 @@ static bool kbase_js_ctx_list_remove_nolock(struct kbase_device *kbdev, } /** - * kbase_js_ctx_list_pop_head_nolock - Variant of kbase_js_ctx_list_pop_head() - * where the caller must hold - * runpool_irq.lock + * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable + * queue. * @kbdev: Device pointer * @js: Job slot to use * - * Caller must hold runpool_irq.lock + * Caller must hold kbasep_jd_device_data::queue_mutex * * Return: Context to use for specified slot. * NULL if no contexts present for specified slot */ -static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( +static struct kbase_context *kbase_js_ctx_list_pop_head( struct kbase_device *kbdev, int js) { struct kbase_context *kctx; - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kbdev->js_data.queue_mutex); if (list_empty(&kbdev->js_data.ctx_list_pullable[js])) return NULL; @@ -1011,28 +1174,6 @@ static struct kbase_context *kbase_js_ctx_list_pop_head_nolock( return kctx; } -/** - * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable - * queue. - * @kbdev: Device pointer - * @js: Job slot to use - * - * Return: Context to use for specified slot. - * NULL if no contexts present for specified slot - */ -static struct kbase_context *kbase_js_ctx_list_pop_head( - struct kbase_device *kbdev, int js) -{ - struct kbase_context *kctx; - unsigned long flags; - - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); - kctx = kbase_js_ctx_list_pop_head_nolock(kbdev, js); - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); - - return kctx; -} - /** * kbase_js_ctx_pullable - Return if a context can be pulled from on the * specified slot @@ -1066,8 +1207,7 @@ static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, return false; /* next atom blocked */ if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || - katom->x_pre_dep->will_fail_event_code) + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) return false; if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) @@ -1096,7 +1236,7 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, /* Dependent atom must already have been submitted */ if (!(dep_atom->atom_flags & - KBASE_KATOM_FLAG_JSCTX_IN_TREE)) { + KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)) { ret = false; break; } @@ -1115,12 +1255,6 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, ret = false; break; } - /* Each dependee atom can only have one - * same-slot dependency */ - if (dep_atom->post_dep) { - ret = false; - break; - } has_dep = true; } else { /* Only one cross-slot dependency can be @@ -1160,6 +1294,21 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, has_x_dep = true; } + if (kbase_jd_katom_dep_type(&katom->dep[i]) == + BASE_JD_DEP_TYPE_DATA && + js == dep_js) { + struct kbase_jd_atom *last_atom = + jsctx_rb_peek_last(kctx, js, + prio); + + /* Last atom on slot must be pre-dep for this + * atom */ + if (last_atom != dep_atom) { + ret = false; + break; + } + } + /* Dependency can be represented in ringbuffers */ } } @@ -1193,10 +1342,9 @@ static bool kbase_js_dep_validate(struct kbase_context *kctx, } if ((kbase_jd_katom_dep_type(&katom->dep[i]) == BASE_JD_DEP_TYPE_DATA) && - (js == dep_js)) { - katom->pre_dep = dep_atom; - dep_atom->post_dep = katom; - } + (js == dep_js)) + katom->atom_flags |= + KBASE_KATOM_FLAG_FAIL_PREV; list_del(&katom->dep_item[i]); kbase_jd_katom_dep_clear(&katom->dep[i]); @@ -1262,7 +1410,17 @@ bool kbasep_js_add_job(struct kbase_context *kctx, KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom)); - enqueue_required = kbase_js_dep_resolved_submit(kctx, atom); + if (kbase_js_dep_resolved_submit(kctx, atom, &enqueue_required) != 0) { + /* Ringbuffer was full (should be impossible) - fail the job */ + --(js_kctx_info->ctx.nr_jobs); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&js_devdata->runpool_mutex); + + atom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + goto out_unlock; + } KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, kbasep_js_trace_get_refcnt_nolock(kbdev, kctx)); @@ -1272,11 +1430,11 @@ bool kbasep_js_add_job(struct kbase_context *kctx, if (enqueue_required) { if (kbase_js_ctx_pullable(kctx, atom->slot_nr, false)) - timer_sync = kbase_js_ctx_list_add_pullable_nolock( - kbdev, kctx, atom->slot_nr); + timer_sync = kbase_js_ctx_list_add_pullable(kbdev, kctx, + atom->slot_nr); else - timer_sync = kbase_js_ctx_list_add_unpullable_nolock( - kbdev, kctx, atom->slot_nr); + timer_sync = kbase_js_ctx_list_add_unpullable(kbdev, + kctx, atom->slot_nr); } /* If this context is active and the atom is the first on its slot, * kick the job manager to attempt to fast-start the atom */ @@ -1604,7 +1762,9 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_mmu_as_released(kctx->as_nr); #endif +#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_nret_as_ctx(&kbdev->as[kctx->as_nr], kctx); +#endif kbase_backend_release_ctx_irq(kbdev, kctx); @@ -1657,8 +1817,7 @@ static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( wake_up(&js_kctx_info->ctx.is_scheduled_wait); /* Queue an action to occur after we've dropped the lock */ - release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED | - KBASEP_JS_RELEASE_RESULT_SCHED_ALL; + release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED; } else { kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, katom_retained_state, runpool_ctx_attr_change); @@ -1716,12 +1875,14 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state( { struct kbasep_js_device_data *js_devdata; struct kbasep_js_kctx_info *js_kctx_info; + base_jd_event_code event_code; kbasep_js_release_result release_result; KBASE_DEBUG_ASSERT(kbdev != NULL); KBASE_DEBUG_ASSERT(kctx != NULL); js_kctx_info = &kctx->jctx.sched_info; js_devdata = &kbdev->js_data; + event_code = katom_retained_state->event_code; mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); @@ -1793,11 +1954,77 @@ static void kbasep_js_runpool_release_ctx_no_schedule( */ } -void kbase_js_set_timeouts(struct kbase_device *kbdev) +/** + * kbase_js_set_timeouts - update all JS timeouts with user specified data + * @kbdev: Device pointer + * + * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is + * set to a positive number then that becomes the new value used, if a timeout + * is negative then the default is set. + */ +static void kbase_js_set_timeouts(struct kbase_device *kbdev) { - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + struct kbasep_js_device_data *js_data = &kbdev->js_data; + + if (kbdev->js_scheduling_period_ns < 0) + js_data->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS; + else if (kbdev->js_scheduling_period_ns > 0) + js_data->scheduling_period_ns = kbdev->js_scheduling_period_ns; + + if (kbdev->js_soft_stop_ticks < 0) + js_data->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS; + else if (kbdev->js_soft_stop_ticks > 0) + js_data->soft_stop_ticks = kbdev->js_soft_stop_ticks; + + if (kbdev->js_soft_stop_ticks_cl < 0) + js_data->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL; + else if (kbdev->js_soft_stop_ticks_cl > 0) + js_data->soft_stop_ticks_cl = kbdev->js_soft_stop_ticks_cl; + + if (kbdev->js_hard_stop_ticks_ss < 0) { + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) + js_data->hard_stop_ticks_ss = + DEFAULT_JS_HARD_STOP_TICKS_SS_8408; + else + js_data->hard_stop_ticks_ss = + DEFAULT_JS_HARD_STOP_TICKS_SS; + } else if (kbdev->js_hard_stop_ticks_ss > 0) { + js_data->hard_stop_ticks_ss = kbdev->js_hard_stop_ticks_ss; + } + + if (kbdev->js_hard_stop_ticks_cl < 0) + js_data->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL; + else if (kbdev->js_hard_stop_ticks_cl > 0) + js_data->hard_stop_ticks_cl = kbdev->js_hard_stop_ticks_cl; + + if (kbdev->js_hard_stop_ticks_dumping < 0) + js_data->hard_stop_ticks_dumping = + DEFAULT_JS_HARD_STOP_TICKS_DUMPING; + else if (kbdev->js_hard_stop_ticks_dumping > 0) + js_data->hard_stop_ticks_dumping = + kbdev->js_hard_stop_ticks_dumping; + + if (kbdev->js_reset_ticks_ss < 0) { + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) + js_data->gpu_reset_ticks_ss = + DEFAULT_JS_RESET_TICKS_SS_8408; + else + js_data->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS; + } else if (kbdev->js_reset_ticks_ss > 0) { + js_data->gpu_reset_ticks_ss = kbdev->js_reset_ticks_ss; + } + + if (kbdev->js_reset_ticks_cl < 0) + js_data->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL; + else if (kbdev->js_reset_ticks_cl > 0) + js_data->gpu_reset_ticks_cl = kbdev->js_reset_ticks_cl; - kbase_backend_timeouts_changed(kbdev); + if (kbdev->js_reset_ticks_dumping < 0) + js_data->gpu_reset_ticks_dumping = + DEFAULT_JS_RESET_TICKS_DUMPING; + else if (kbdev->js_reset_ticks_dumping > 0) + js_data->gpu_reset_ticks_dumping = + kbdev->js_reset_ticks_dumping; } static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, @@ -1844,6 +2071,16 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, 0u, kbasep_js_trace_get_refcnt(kbdev, kctx)); + if (js_devdata->nr_user_contexts_running == 0 && + kbdev->js_timeouts_updated) { + /* Only when there are no other contexts submitting jobs: + * Latch in run-time job scheduler timeouts that were set + * through js_timeouts sysfs file */ + kbase_js_set_timeouts(kbdev); + + kbdev->js_timeouts_updated = false; + } + js_kctx_info->ctx.is_scheduled = true; mutex_lock(&new_address_space->transaction_mutex); @@ -1853,10 +2090,13 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) { spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&new_address_space->transaction_mutex); - /* Roll back the transaction so far and return */ - js_kctx_info->ctx.is_scheduled = false; + /* If address space is not pending, then kbase_backend_use_ctx() + * failed. Roll back the transaction so far and return */ + if (!kctx->as_pending) { + js_kctx_info->ctx.is_scheduled = false; - kbase_backend_release_free_address_space(kbdev, as_nr); + kbase_backend_release_free_address_space(kbdev, as_nr); + } mutex_unlock(&js_devdata->runpool_mutex); @@ -1869,7 +2109,9 @@ static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_mmu_as_in_use(kctx->as_nr); #endif +#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_tl_ret_as_ctx(&kbdev->as[kctx->as_nr], kctx); +#endif /* Cause any future waiter-on-termination to wait until the context is * descheduled */ @@ -1927,6 +2169,11 @@ static bool kbase_js_use_ctx(struct kbase_device *kbdev, unsigned long flags; spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + if (kctx->as_pending) { + /* Context waiting for AS to be assigned */ + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + return false; + } if (kbase_backend_use_ctx_sched(kbdev, kctx)) { /* Context already has ASID - mark as active */ kbdev->hwaccess.active_kctx = kctx; @@ -1965,7 +2212,7 @@ void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, is_scheduled = js_kctx_info->ctx.is_scheduled; if (!is_scheduled) { /* Add the context to the pullable list */ - if (kbase_js_ctx_list_add_pullable_head(kbdev, kctx, 0)) + if (kbase_js_ctx_list_add_pullable(kbdev, kctx, 0)) kbase_js_sync_timers(kbdev); /* Fast-starting requires the jsctx_mutex to be dropped, @@ -1993,6 +2240,7 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) { struct kbasep_js_kctx_info *js_kctx_info; + bool pending; KBASE_DEBUG_ASSERT(kctx != NULL); js_kctx_info = &kctx->jctx.sched_info; @@ -2000,10 +2248,13 @@ void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, /* We don't need to use the address space anymore */ mutex_lock(&js_kctx_info->ctx.jsctx_mutex); js_kctx_info->ctx.flags &= (~KBASE_CTX_FLAG_PRIVILEGED); + pending = kctx->as_pending; mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); - /* Release the context - it will be scheduled out */ - kbasep_js_runpool_release_ctx(kbdev, kctx); + /* Release the context - it will be scheduled out if there is no + * pending job */ + if (!pending) + kbasep_js_runpool_release_ctx(kbdev, kctx); kbase_js_sched_all(kbdev); } @@ -2042,10 +2293,8 @@ void kbasep_js_suspend(struct kbase_device *kbdev) * the instrumented context. It'll be suspended by * disabling instrumentation */ if (kctx->jctx.sched_info.ctx.flags & - KBASE_CTX_FLAG_PRIVILEGED) { - ++nr_privileged_ctx; - WARN_ON(nr_privileged_ctx != 1); - } + KBASE_CTX_FLAG_PRIVILEGED) + KBASE_DEBUG_ASSERT(++nr_privileged_ctx == 1); } } CSTD_UNUSED(nr_privileged_ctx); @@ -2096,8 +2345,7 @@ void kbasep_js_resume(struct kbase_device *kbdev) if (!js_kctx_info->ctx.is_scheduled && kbase_js_ctx_pullable(kctx, js, false)) - timer_sync = - kbase_js_ctx_list_add_pullable_nolock( + timer_sync = kbase_js_ctx_list_add_pullable( kbdev, kctx, js); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, @@ -2149,118 +2397,37 @@ static int kbase_js_get_slot(struct kbase_device *kbdev, return 1; } -bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, - struct kbase_jd_atom *katom) +int kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom, + bool *enqueue_required) { - bool enqueue_required; - katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - lockdep_assert_held(&kctx->jctx.lock); /* If slot will transition from unpullable to pullable then add to * pullable list */ if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) { - enqueue_required = true; + *enqueue_required = true; } else { - enqueue_required = false; + *enqueue_required = false; } /* Check if there are lower priority jobs to soft stop */ kbase_job_slot_ctx_priority_check_locked(kctx, katom); - if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) || - (katom->pre_dep && (katom->pre_dep->atom_flags & - KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { - int prio = katom->sched_priority; - int js = katom->slot_nr; - struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js]; - - list_add_tail(&katom->queue, &queue->x_dep_head); - katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; - enqueue_required = false; - } else { - /* Add atom to ring buffer. */ - jsctx_tree_add(kctx, katom); - katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; - } - - return enqueue_required; -} - -/** - * kbase_js_move_to_tree - Move atom (and any dependent atoms) to the - * runnable_tree, ready for execution - * @katom: Atom to submit - * - * It is assumed that @katom does not have KBASE_KATOM_FLAG_X_DEP_BLOCKED set, - * but is still present in the x_dep list. If @katom has a same-slot dependent - * atom then that atom (and any dependents) will also be moved. - */ -static void kbase_js_move_to_tree(struct kbase_jd_atom *katom) -{ - lockdep_assert_held(&katom->kctx->kbdev->js_data.runpool_irq.lock); - - while (katom) { - WARN_ON(!(katom->atom_flags & - KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)); - - if (!(katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { - list_del(&katom->queue); - katom->atom_flags &= - ~KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST; - jsctx_tree_add(katom->kctx, katom); - katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_IN_TREE; - } else { - break; - } - - katom = katom->post_dep; - } -} - - -/** - * kbase_js_evict_deps - Evict dependencies of a failed atom. - * @kctx: Context pointer - * @katom: Pointer to the atom that has failed. - * @js: The job slot the katom was run on. - * @prio: Priority of the katom. - * - * Remove all post dependencies of an atom from the context ringbuffers. - * - * The original atom's event_code will be propogated to all dependent atoms. - * - * Context: Caller must hold the HW access lock - */ -static void kbase_js_evict_deps(struct kbase_context *kctx, - struct kbase_jd_atom *katom, int js, int prio) -{ - struct kbase_jd_atom *x_dep = katom->x_post_dep; - struct kbase_jd_atom *next_katom = katom->post_dep; - - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - - if (next_katom) { - KBASE_DEBUG_ASSERT(next_katom->status != - KBASE_JD_ATOM_STATE_HW_COMPLETED); - next_katom->will_fail_event_code = katom->event_code; - + /* Add atom to ring buffer. */ + if (unlikely(jsctx_rb_add_atom(kctx, katom))) { + /* The ring buffer is full. This should be impossible as the + * job dispatcher can not submit enough atoms to exceed the + * ring buffer size. Fail the job. + */ + WARN(1, "Job submit while JSCTX ringbuffer already full\n"); + return -EINVAL; } - /* Has cross slot depenency. */ - if (x_dep && (x_dep->atom_flags & (KBASE_KATOM_FLAG_JSCTX_IN_TREE | - KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST))) { - /* Remove dependency.*/ - x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; - /* Fail if it had a data dependency. */ - if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) { - x_dep->will_fail_event_code = katom->event_code; - } - if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST) - kbase_js_move_to_tree(x_dep); - } + return 0; } struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) @@ -2289,7 +2456,8 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) /* Due to ordering restrictions when unpulling atoms on failure, we do * not allow multiple runs of fail-dep atoms from the same context to be * present on the same slot */ - if (katom->pre_dep && atomic_read(&kctx->atoms_pulled_slot[js])) { + if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) && + atomic_read(&kctx->atoms_pulled_slot[js])) { struct kbase_jd_atom *prev_atom = kbase_backend_inspect_tail(kctx->kbdev, js); @@ -2299,8 +2467,7 @@ struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { if (katom->x_pre_dep->gpu_rb_state == - KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB || - katom->x_pre_dep->will_fail_event_code) + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) return NULL; if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) @@ -2343,8 +2510,6 @@ static void js_return_worker(struct work_struct *data) u64 affinity = katom->affinity; enum kbase_atom_coreref_state coreref_state = katom->coreref_state; - kbase_tlstream_aux_job_softstop_ex(katom); - kbase_backend_complete_wq(kbdev, katom); if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) @@ -2364,14 +2529,13 @@ static void js_return_worker(struct work_struct *data) if (!atomic_read(&kctx->atoms_pulled_slot[js]) && jsctx_rb_none_to_pull(kctx, js)) - timer_sync |= kbase_js_ctx_list_remove_nolock(kbdev, kctx, js); + timer_sync |= kbase_js_ctx_list_remove(kbdev, kctx, js); if (!atomic_read(&kctx->atoms_pulled)) { if (!kctx->slots_pullable) { WARN_ON(!kctx->ctx_runnable_ref); kctx->ctx_runnable_ref = false; atomic_dec(&kbdev->js_data.nr_contexts_runnable); - timer_sync = true; } if (kctx->as_nr != KBASEP_AS_NR_INVALID && @@ -2385,7 +2549,7 @@ static void js_return_worker(struct work_struct *data) for (slot = 0; slot < num_slots; slot++) { if (kbase_js_ctx_pullable(kctx, slot, true)) timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( + kbase_js_ctx_list_add_pullable( kbdev, kctx, slot); } } @@ -2437,6 +2601,112 @@ void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) queue_work(kctx->jctx.job_done_wq, &katom->work); } +static bool kbase_js_evict_atom(struct kbase_context *kctx, + struct kbase_jd_atom *katom_evict, + struct kbase_jd_atom *start_katom, + struct kbase_jd_atom *head_katom, + struct list_head *evict_list, + struct jsctx_rb *rb, int idx) +{ + struct kbase_jd_atom *x_dep = katom_evict->x_post_dep; + + if (!(katom_evict->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) && + katom_evict != start_katom) + return false; + + if (katom_evict->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + WARN_ON(katom_evict->event_code != head_katom->event_code); + + return false; + } + + if (katom_evict->status == KBASE_JD_ATOM_STATE_HW_COMPLETED && + katom_evict != head_katom) + return false; + + /* Evict cross dependency if present */ + if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) + && (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) + list_add_tail(&x_dep->dep_item[0], evict_list); + + /* If cross dependency is present and does not have a data dependency + * then unblock */ + if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) + && !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) + x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + + if (katom_evict != head_katom) { + rb->entries[idx & JSCTX_RB_MASK].atom_id = + KBASEP_ATOM_ID_INVALID; + + katom_evict->event_code = head_katom->event_code; + katom_evict->atom_flags &= + ~KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; + + if (katom_evict->atom_flags & KBASE_KATOM_FLAG_HOLDING_CTX_REF) + kbase_jd_done(katom_evict, katom_evict->slot_nr, NULL, + 0); + else + kbase_jd_evict(kctx->kbdev, katom_evict); + } + + return true; +} + +/** + * kbase_js_evict_deps - Evict dependencies + * @kctx: Context pointer + * @head_katom: Pointer to the atom to evict + * + * Remove all post dependencies of an atom from the context ringbuffers. + * + * The original atom's event_code will be propogated to all dependent atoms. + * + * Context: Caller must hold both jctx and HW access locks + */ +static void kbase_js_evict_deps(struct kbase_context *kctx, + struct kbase_jd_atom *head_katom) +{ + struct list_head evict_list; + + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + INIT_LIST_HEAD(&evict_list); + + list_add_tail(&head_katom->dep_item[0], &evict_list); + + while (!list_empty(&evict_list)) { + struct kbase_jd_atom *start_katom; + + start_katom = list_entry(evict_list.prev, struct kbase_jd_atom, + dep_item[0]); + list_del(evict_list.prev); + + jsctx_rb_evict(kctx, start_katom, head_katom, &evict_list); + } +} + +/** + * kbase_js_compact - Compact JSCTX ringbuffers + * @kctx: Context pointer + * + * Compact the JSCTX ringbuffers, removing any NULL entries + * + * Context: Caller must hold both jctx and HW access locks + */ +static void kbase_js_compact(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + int js; + + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) + jsctx_rb_compact(kctx, js); +} + bool kbase_js_complete_atom_wq(struct kbase_context *kctx, struct kbase_jd_atom *katom) { @@ -2459,7 +2729,12 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, mutex_lock(&js_devdata->runpool_mutex); spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE) { + if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) { + if (katom->event_code != BASE_JD_EVENT_DONE) + kbase_js_evict_deps(kctx, katom); + + jsctx_rb_remove(kctx, katom); + context_idle = !atomic_dec_return(&kctx->atoms_pulled); atomic_dec(&kctx->atoms_pulled_slot[atom_slot]); @@ -2468,18 +2743,16 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, WARN_ON(!kctx->ctx_runnable_ref); kctx->ctx_runnable_ref = false; atomic_dec(&kbdev->js_data.nr_contexts_runnable); - timer_sync = true; } + + if (katom->event_code != BASE_JD_EVENT_DONE) + kbase_js_compact(kctx); } - WARN_ON(!(katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_TREE)); if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) && - jsctx_rb_none_to_pull(kctx, atom_slot)) { - if (!list_empty( - &kctx->jctx.sched_info.ctx.ctx_list_entry[atom_slot])) - timer_sync |= kbase_js_ctx_list_remove_nolock( - kctx->kbdev, kctx, atom_slot); - } + jsctx_rb_none_to_pull(kctx, atom_slot)) + timer_sync |= kbase_js_ctx_list_remove(kctx->kbdev, kctx, + atom_slot); /* * If submission is disabled on this context (most likely due to an @@ -2495,8 +2768,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { if (kbase_js_ctx_pullable(kctx, js, true)) - timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( + timer_sync |= kbase_js_ctx_list_add_pullable( kbdev, kctx, js); } } else if (katom->x_post_dep && @@ -2505,8 +2777,7 @@ bool kbase_js_complete_atom_wq(struct kbase_context *kctx, for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { if (kbase_js_ctx_pullable(kctx, js, true)) - timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( + timer_sync |= kbase_js_ctx_list_add_pullable( kbdev, kctx, js); } } @@ -2531,29 +2802,32 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) struct kbase_device *kbdev; struct kbase_context *kctx = katom->kctx; union kbasep_js_policy *js_policy; - struct kbase_jd_atom *x_dep = katom->x_post_dep; + struct kbasep_js_device_data *js_devdata; kbdev = kctx->kbdev; js_policy = &kbdev->js_data.policy; + js_devdata = &kbdev->js_data; lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - if (katom->will_fail_event_code) - katom->event_code = katom->will_fail_event_code; - katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; - if (katom->event_code != BASE_JD_EVENT_DONE) { - kbase_js_evict_deps(kctx, katom, katom->slot_nr, - katom->sched_priority); - } - #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP, katom->slot_nr), NULL, 0); #endif - +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_tl_nret_atom_lpu( + katom, + &kbdev->gpu_props.props.raw_props.js_features[ + katom->slot_nr]); + kbase_tlstream_tl_nret_atom_as(katom, &kbdev->as[kctx->as_nr]); + kbase_tlstream_tl_nret_ctx_lpu( + kctx, + &kbdev->gpu_props.props.raw_props.js_features[ + katom->slot_nr]); +#endif /* Calculate the job's time used */ if (end_timestamp != NULL) { /* Only calculating it for jobs that really run on the HW (e.g. @@ -2577,26 +2851,21 @@ void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0); /* Unblock cross dependency if present */ - if (x_dep && (katom->event_code == BASE_JD_EVENT_DONE || - !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) && - (x_dep->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED)) { - bool was_pullable = kbase_js_ctx_pullable(kctx, x_dep->slot_nr, - false); - x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; - kbase_js_move_to_tree(x_dep); - if (!was_pullable && kbase_js_ctx_pullable(kctx, x_dep->slot_nr, - false)) - kbase_js_ctx_list_add_pullable_nolock(kbdev, kctx, - x_dep->slot_nr); - } + if (katom->x_post_dep && (katom->event_code == BASE_JD_EVENT_DONE || + !(katom->x_post_dep->atom_flags & + KBASE_KATOM_FLAG_FAIL_BLOCKER))) + katom->x_post_dep->atom_flags &= + ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; } void kbase_js_sched(struct kbase_device *kbdev, int js_mask) { struct kbasep_js_device_data *js_devdata; + union kbasep_js_policy *js_policy; bool timer_sync = false; js_devdata = &kbdev->js_data; + js_policy = &js_devdata->policy; down(&js_devdata->schedule_sem); mutex_lock(&js_devdata->queue_mutex); @@ -2646,15 +2915,16 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) /* Context can not be used at this time */ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - if (kbase_js_ctx_pullable(kctx, js, false) + if (kctx->as_pending || + kbase_js_ctx_pullable(kctx, js, false) || (kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_PRIVILEGED)) timer_sync |= - kbase_js_ctx_list_add_pullable_head_nolock( + kbase_js_ctx_list_add_pullable_head( kctx->kbdev, kctx, js); else timer_sync |= - kbase_js_ctx_list_add_unpullable_nolock( + kbase_js_ctx_list_add_unpullable( kctx->kbdev, kctx, js); spin_unlock_irqrestore( &js_devdata->runpool_irq.lock, flags); @@ -2683,12 +2953,12 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) /* Failed to pull jobs - push to head of list */ if (kbase_js_ctx_pullable(kctx, js, true)) timer_sync |= - kbase_js_ctx_list_add_pullable_head_nolock( + kbase_js_ctx_list_add_pullable_head( kctx->kbdev, kctx, js); else timer_sync |= - kbase_js_ctx_list_add_unpullable_nolock( + kbase_js_ctx_list_add_unpullable( kctx->kbdev, kctx, js); @@ -2714,12 +2984,10 @@ void kbase_js_sched(struct kbase_device *kbdev, int js_mask) /* Push to back of list */ if (kbase_js_ctx_pullable(kctx, js, true)) - timer_sync |= - kbase_js_ctx_list_add_pullable_nolock( + timer_sync |= kbase_js_ctx_list_add_pullable( kctx->kbdev, kctx, js); else - timer_sync |= - kbase_js_ctx_list_add_unpullable_nolock( + timer_sync |= kbase_js_ctx_list_add_unpullable( kctx->kbdev, kctx, js); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); @@ -2750,7 +3018,6 @@ void kbase_js_zap_context(struct kbase_context *kctx) /* First, atomically do the following: * - mark the context as dying * - try to evict it from the policy queue */ - mutex_lock(&kctx->jctx.lock); mutex_lock(&js_devdata->queue_mutex); mutex_lock(&js_kctx_info->ctx.jsctx_mutex); js_kctx_info->ctx.is_dying = true; @@ -2826,7 +3093,6 @@ void kbase_js_zap_context(struct kbase_context *kctx) mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); - mutex_unlock(&kctx->jctx.lock); } else { unsigned long flags; bool was_retained; @@ -2862,7 +3128,6 @@ void kbase_js_zap_context(struct kbase_context *kctx) spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); mutex_unlock(&js_devdata->queue_mutex); - mutex_unlock(&kctx->jctx.lock); dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", kctx); @@ -2929,6 +3194,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, u32 js; kbdev = kctx->kbdev; + js_devdata = &kbdev->js_data; spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); @@ -2938,7 +3204,7 @@ static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, /* Invoke callback on jobs on each slot in turn */ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) - jsctx_queue_foreach(kctx, js, callback); + jsctx_rb_foreach(kctx, js, callback); spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h index 66b2132930160..868c6808d628c 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -491,22 +491,15 @@ void kbasep_js_resume(struct kbase_device *kbdev); * @param[in] kctx Context pointer * @param[in] atom Pointer to the atom to submit * - * @return Whether the context requires to be enqueued. */ -bool kbase_js_dep_resolved_submit(struct kbase_context *kctx, - struct kbase_jd_atom *katom); + * @return 0 if submit succeeded + * error code if the atom can not be submitted at this + * time, due to insufficient space in the ringbuffer, or dependencies + * that can not be represented. + */ +int kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom, + bool *enqueue_required); -/** - * jsctx_ll_flush_to_rb() - Pushes atoms from the linked list to ringbuffer. - * @kctx: Context Pointer - * @prio: Priority (specifies the queue together with js). - * @js: Job slot (specifies the queue together with prio). - * - * Pushes all possible atoms from the linked list to the ringbuffer. - * Number of atoms are limited to free space in the ringbuffer and - * number of available atoms in the linked list. - * - */ -void jsctx_ll_flush_to_rb(struct kbase_context *kctx, int prio, int js); /** * @brief Pull an atom from a context in the job scheduler for execution. * @@ -608,16 +601,6 @@ void kbase_js_zap_context(struct kbase_context *kctx); bool kbase_js_is_atom_valid(struct kbase_device *kbdev, struct kbase_jd_atom *katom); -/** - * kbase_js_set_timeouts - update all JS timeouts with user specified data - * @kbdev: Device pointer - * - * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is - * set to a positive number then that becomes the new value used, if a timeout - * is negative then the default is set. - */ -void kbase_js_set_timeouts(struct kbase_device *kbdev); - /* * Helpers follow */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h index e1342045b394c..04f7809f79d3d 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -348,9 +348,6 @@ struct kbasep_js_device_data { u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */ u32 cfs_ctx_runtime_min_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */ - /**< Value for JS_SOFT_JOB_TIMEOUT */ - atomic_t soft_job_timeout_ms; - /** List of suspended soft jobs */ struct list_head suspended_soft_jobs_list; @@ -405,7 +402,7 @@ struct kbasep_js_kctx_info { * * You may not access any of these members from IRQ context. */ - struct kbase_jsctx { + struct { struct mutex jsctx_mutex; /**< Job Scheduler Context lock */ /** Number of jobs ready to run - does \em not include the jobs waiting in diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c index 90c13458ec7cc..692460710ce07 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -235,11 +235,16 @@ int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context * void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx) { + struct kbasep_js_policy_cfs_ctx *ctx_info; + struct kbasep_js_policy_cfs *policy_info; struct kbase_device *kbdev; KBASE_DEBUG_ASSERT(js_policy != NULL); KBASE_DEBUG_ASSERT(kctx != NULL); + policy_info = &js_policy->cfs; + ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs; + kbdev = container_of(js_policy, struct kbase_device, js_data.policy); KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_TERM_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx)); diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c index c1851caa95a03..ffc12a538af68 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,21 +24,22 @@ #ifdef CONFIG_DMA_SHARED_BUFFER #include #endif /* CONFIG_DMA_SHARED_BUFFER */ -#ifdef CONFIG_UMP -#include -#endif /* CONFIG_UMP */ + #include #include #include -#include #include #include #include #include #include +#include #include + +#if defined(CONFIG_MALI_MIPE_ENABLED) #include +#endif /** * @brief Check the zone compatibility of two regions. @@ -391,33 +392,13 @@ int kbase_add_va_region(struct kbase_context *kctx, { u64 start_pfn; - /* - * Depending on the zone the allocation request is for - * we might need to retry it. - */ - do { - tmp = kbase_region_tracker_find_region_meeting_reqs( - kctx, reg, nr_pages, align); - if (tmp) { - start_pfn = (tmp->start_pfn + align - 1) & - ~(align - 1); - err = kbase_insert_va_region_nolock(kctx, reg, - tmp, start_pfn, nr_pages); - break; - } - - /* - * If the allocation is not from the same zone as JIT - * then don't retry, we're out of VA and there is - * nothing which can be done about it. - */ - if ((reg->flags & KBASE_REG_ZONE_MASK) != - KBASE_REG_ZONE_CUSTOM_VA) - break; - } while (kbase_jit_evict(kctx)); - - if (!tmp) + tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg, nr_pages, align); + if (!tmp) { err = -ENOMEM; + goto exit; + } + start_pfn = (tmp->start_pfn + align - 1) & ~(align - 1); + err = kbase_insert_va_region_nolock(kctx, reg, tmp, start_pfn, nr_pages); } exit: @@ -429,10 +410,7 @@ KBASE_EXPORT_TEST_API(kbase_add_va_region); /** * @brief Initialize the internal region tracker data structure. */ -static void kbase_region_tracker_ds_init(struct kbase_context *kctx, - struct kbase_va_region *same_va_reg, - struct kbase_va_region *exec_reg, - struct kbase_va_region *custom_va_reg) +static void kbase_region_tracker_ds_init(struct kbase_context *kctx, struct kbase_va_region *same_va_reg, struct kbase_va_region *exec_reg, struct kbase_va_region *custom_va_reg) { kctx->reg_rbtree = RB_ROOT; kbase_region_tracker_insert(kctx, same_va_reg); @@ -470,11 +448,6 @@ int kbase_region_tracker_init(struct kbase_context *kctx) size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE; u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; - u64 same_va_pages; - int err; - - /* Take the lock as kbase_free_alloced_region requires it */ - kbase_gpu_vm_lock(kctx); #if defined(CONFIG_ARM64) same_va_bits = VA_BITS; @@ -491,29 +464,24 @@ int kbase_region_tracker_init(struct kbase_context *kctx) same_va_bits = 33; #endif - if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) { - err = -EINVAL; - goto fail_unlock; - } + if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) + return -EINVAL; - same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; /* all have SAME_VA */ same_va_reg = kbase_alloc_free_region(kctx, 1, - same_va_pages, + (1ULL << (same_va_bits - PAGE_SHIFT)) - 1, KBASE_REG_ZONE_SAME_VA); - if (!same_va_reg) { - err = -ENOMEM; - goto fail_unlock; - } + if (!same_va_reg) + return -ENOMEM; #ifdef CONFIG_64BIT - /* 32-bit clients have exec and custom VA zones */ + /* only 32-bit clients have the other two zones */ if (kctx->is_compat) { #endif if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { - err = -EINVAL; - goto fail_free_same_va; + kbase_free_alloced_region(same_va_reg); + return -EINVAL; } /* If the current size of TMEM is out of range of the * virtual address space addressable by the MMU then @@ -528,8 +496,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx) KBASE_REG_ZONE_EXEC); if (!exec_reg) { - err = -ENOMEM; - goto fail_free_same_va; + kbase_free_alloced_region(same_va_reg); + return -ENOMEM; } custom_va_reg = kbase_alloc_free_region(kctx, @@ -537,8 +505,9 @@ int kbase_region_tracker_init(struct kbase_context *kctx) custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); if (!custom_va_reg) { - err = -ENOMEM; - goto fail_free_exec; + kbase_free_alloced_region(same_va_reg); + kbase_free_alloced_region(exec_reg); + return -ENOMEM; } #ifdef CONFIG_64BIT } @@ -546,108 +515,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx) kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg); - kctx->same_va_end = same_va_pages + 1; - - kbase_gpu_vm_unlock(kctx); - return 0; - -fail_free_exec: - kbase_free_alloced_region(exec_reg); -fail_free_same_va: - kbase_free_alloced_region(same_va_reg); -fail_unlock: - kbase_gpu_vm_unlock(kctx); - return err; -} - -int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages) -{ -#ifdef CONFIG_64BIT - struct kbase_va_region *same_va; - struct kbase_va_region *custom_va_reg; - u64 same_va_bits; - u64 total_va_size; - int err; - - /* - * Nothing to do for 32-bit clients, JIT uses the existing - * custom VA zone. - */ - if (kctx->is_compat) - return 0; - -#if defined(CONFIG_ARM64) - same_va_bits = VA_BITS; -#elif defined(CONFIG_X86_64) - same_va_bits = 47; -#elif defined(CONFIG_64BIT) -#error Unsupported 64-bit architecture -#endif - - if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) - same_va_bits = 33; - - total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; - - kbase_gpu_vm_lock(kctx); - - /* - * Modify the same VA free region after creation. Be careful to ensure - * that allocations haven't been made as they could cause an overlap - * to happen with existing same VA allocations and the custom VA zone. - */ - same_va = kbase_region_tracker_find_region_base_address(kctx, - PAGE_SIZE); - if (!same_va) { - err = -ENOMEM; - goto fail_unlock; - } - - /* The region flag or region size has changed since creation so bail. */ - if ((!(same_va->flags & KBASE_REG_FREE)) || - (same_va->nr_pages != total_va_size)) { - err = -ENOMEM; - goto fail_unlock; - } - - if (same_va->nr_pages < jit_va_pages || - kctx->same_va_end < jit_va_pages) { - err = -ENOMEM; - goto fail_unlock; - } - - /* It's safe to adjust the same VA zone now */ - same_va->nr_pages -= jit_va_pages; - kctx->same_va_end -= jit_va_pages; - - /* - * Create a custom VA zone at the end of the VA for allocations which - * JIT can use so it doesn't have to allocate VA from the kernel. - */ - custom_va_reg = kbase_alloc_free_region(kctx, - kctx->same_va_end, - jit_va_pages, - KBASE_REG_ZONE_CUSTOM_VA); - if (!custom_va_reg) { - /* - * The context will be destroyed if we fail here so no point - * reverting the change we made to same_va. - */ - err = -ENOMEM; - goto fail_unlock; - } - - kbase_region_tracker_insert(kctx, custom_va_reg); - - kbase_gpu_vm_unlock(kctx); - return 0; - -fail_unlock: - kbase_gpu_vm_unlock(kctx); - return err; -#else return 0; -#endif } int kbase_mem_init(struct kbase_device *kbdev) @@ -745,46 +613,8 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region); */ void kbase_free_alloced_region(struct kbase_va_region *reg) { + KBASE_DEBUG_ASSERT(NULL != reg); if (!(reg->flags & KBASE_REG_FREE)) { - /* - * The physical allocation should have been removed from the - * eviction list before this function is called. However, in the - * case of abnormal process termination or the app leaking the - * memory kbase_mem_free_region is not called so it can still be - * on the list at termination time of the region tracker. - */ - if (!list_empty(®->gpu_alloc->evict_node)) { - /* - * Unlink the physical allocation before unmaking it - * evictable so that the allocation isn't grown back to - * its last backed size as we're going to unmap it - * anyway. - */ - reg->cpu_alloc->reg = NULL; - if (reg->cpu_alloc != reg->gpu_alloc) - reg->gpu_alloc->reg = NULL; - - /* - * If a region has been made evictable then we must - * unmake it before trying to free it. - * If the memory hasn't been reclaimed it will be - * unmapped and freed below, if it has been reclaimed - * then the operations below are no-ops. - */ - if (reg->flags & KBASE_REG_DONT_NEED) { - KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == - KBASE_MEM_TYPE_NATIVE); - kbase_mem_evictable_unmake(reg->gpu_alloc); - } - } - - /* - * Remove the region from the sticky resource metadata - * list should it be there. - */ - kbase_sticky_resource_release(reg->kctx, NULL, - reg->start_pfn << PAGE_SHIFT); - kbase_mem_phy_alloc_put(reg->cpu_alloc); kbase_mem_phy_alloc_put(reg->gpu_alloc); /* To detect use-after-free in debug builds */ @@ -795,6 +625,41 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) KBASE_EXPORT_TEST_API(kbase_free_alloced_region); +void kbase_mmu_update(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(NULL != kctx); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + /* ASSERT that the context has a valid as_nr, which is only the case + * when it's scheduled in. + * + * as_nr won't change because the caller has the runpool_irq lock */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex); + + kctx->kbdev->mmu_mode->update(kctx); +} + +KBASE_EXPORT_TEST_API(kbase_mmu_update); + +void kbase_mmu_disable(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(NULL != kctx); + /* ASSERT that the context has a valid as_nr, which is only the case + * when it's scheduled in. + * + * as_nr won't change because the caller has the runpool_irq lock */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); +} + +KBASE_EXPORT_TEST_API(kbase_mmu_disable); + +void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) +{ + kbdev->mmu_mode->disable_as(kbdev, as_nr); +} + int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) { int err; @@ -1026,10 +891,10 @@ static int kbase_do_syncset(struct kbase_context *kctx, /* find the region where the virtual address is contained */ reg = kbase_region_tracker_find_region_enclosing_address(kctx, - sset->mem_handle.basep.handle); + sset->mem_handle); if (!reg) { dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX", - sset->mem_handle.basep.handle); + sset->mem_handle); err = -EINVAL; goto out_unlock; } @@ -1043,7 +908,7 @@ static int kbase_do_syncset(struct kbase_context *kctx, map = kbasep_find_enclosing_cpu_mapping_of_region(reg, start, size); if (!map) { dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", - start, sset->mem_handle.basep.handle); + start, sset->mem_handle); err = -EINVAL; goto out_unlock; } @@ -1124,34 +989,17 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL != reg); lockdep_assert_held(&kctx->reg_lock); - - /* - * Unlink the physical allocation before unmaking it evictable so - * that the allocation isn't grown back to its last backed size - * as we're going to unmap it anyway. - */ - reg->cpu_alloc->reg = NULL; - if (reg->cpu_alloc != reg->gpu_alloc) - reg->gpu_alloc->reg = NULL; - - /* - * If a region has been made evictable then we must unmake it - * before trying to free it. - * If the memory hasn't been reclaimed it will be unmapped and freed - * below, if it has been reclaimed then the operations below are no-ops. - */ - if (reg->flags & KBASE_REG_DONT_NEED) { - KBASE_DEBUG_ASSERT(reg->cpu_alloc->type == - KBASE_MEM_TYPE_NATIVE); - kbase_mem_evictable_unmake(reg->gpu_alloc); - } - err = kbase_gpu_munmap(kctx, reg); if (err) { dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n"); goto out; } - +#ifndef CONFIG_MALI_NO_MALI + if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) { + /* Wait for GPU to flush write buffer before freeing physical pages */ + kbase_wait_write_flush(kctx); + } +#endif /* This will also free the physical pages */ kbase_free_alloced_region(reg); @@ -1198,6 +1046,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) kbase_free_alloced_region(reg); } else { /* A real GPU va */ + /* Validate the region */ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); if (!reg || (reg->flags & KBASE_REG_FREE)) { @@ -1214,6 +1063,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) err = -EINVAL; goto out_unlock; } + err = kbase_mem_free_region(kctx, reg); } @@ -1275,8 +1125,8 @@ int kbase_alloc_phy_pages_helper( size_t nr_pages_requested) { int new_page_count __maybe_unused; - size_t old_page_count = alloc->nents; + KBASE_DEBUG_ASSERT(alloc); KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); KBASE_DEBUG_ASSERT(alloc->imported.kctx); @@ -1292,21 +1142,14 @@ int kbase_alloc_phy_pages_helper( kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested); if (kbase_mem_pool_alloc_pages(&alloc->imported.kctx->mem_pool, - nr_pages_requested, alloc->pages + old_page_count) != 0) + nr_pages_requested, alloc->pages + alloc->nents) != 0) goto no_alloc; - /* - * Request a zone cache update, this scans only the new pages an - * appends their information to the zone cache. if the update - * fails then clear the cache so we fall-back to doing things - * page by page. - */ - if (kbase_zone_cache_update(alloc, old_page_count) != 0) - kbase_zone_cache_clear(alloc); - +#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_pagesalloc( (u32)alloc->imported.kctx->id, (u64)new_page_count); +#endif alloc->nents += nr_pages_requested; done: @@ -1324,12 +1167,11 @@ int kbase_free_phy_pages_helper( struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free) { - struct kbase_context *kctx = alloc->imported.kctx; bool syncback; - bool reclaimed = (alloc->evicted != 0); phys_addr_t *start_free; int new_page_count __maybe_unused; + KBASE_DEBUG_ASSERT(alloc); KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); KBASE_DEBUG_ASSERT(alloc->imported.kctx); KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); @@ -1342,37 +1184,22 @@ int kbase_free_phy_pages_helper( syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; - /* - * Clear the zone cache, we don't expect JIT allocations to be - * shrunk in parts so there is no point trying to optimize for that - * by scanning for the changes caused by freeing this memory and - * updating the existing cache entries. - */ - kbase_zone_cache_clear(alloc); - - kbase_mem_pool_free_pages(&kctx->mem_pool, + kbase_mem_pool_free_pages(&alloc->imported.kctx->mem_pool, nr_pages_to_free, start_free, - syncback, - reclaimed); + syncback); alloc->nents -= nr_pages_to_free; + kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_to_free); + new_page_count = kbase_atomic_sub_pages( + nr_pages_to_free, &alloc->imported.kctx->used_pages); + kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->kbdev->memdev.used_pages); - /* - * If the allocation was not evicted (i.e. evicted == 0) then - * the page accounting needs to be done. - */ - if (!reclaimed) { - kbase_process_page_usage_dec(kctx, nr_pages_to_free); - new_page_count = kbase_atomic_sub_pages(nr_pages_to_free, - &kctx->used_pages); - kbase_atomic_sub_pages(nr_pages_to_free, - &kctx->kbdev->memdev.used_pages); - - kbase_tlstream_aux_pagesalloc( - (u32)kctx->id, - (u64)new_page_count); - } +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_pagesalloc( + (u32)alloc->imported.kctx->id, + (u64)new_page_count); +#endif return 0; } @@ -1385,12 +1212,7 @@ void kbase_mem_kref_free(struct kref *kref) switch (alloc->type) { case KBASE_MEM_TYPE_NATIVE: { - WARN_ON(!alloc->imported.kctx); - /* - * The physical allocation must have been removed from the - * eviction list before trying to free it. - */ - WARN_ON(!list_empty(&alloc->evict_node)); + KBASE_DEBUG_ASSERT(alloc->imported.kctx); kbase_free_phy_pages_helper(alloc, alloc->nents); break; } @@ -1424,8 +1246,6 @@ void kbase_mem_kref_free(struct kref *kref) break; #endif case KBASE_MEM_TYPE_IMPORTED_USER_BUF: - if (alloc->imported.user_buf.mm) - mmdrop(alloc->imported.user_buf.mm); kfree(alloc->imported.user_buf.pages); break; case KBASE_MEM_TYPE_TB:{ @@ -1470,11 +1290,9 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0) goto out_term; - reg->cpu_alloc->reg = reg; if (reg->cpu_alloc != reg->gpu_alloc) { if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0) goto out_rollback; - reg->gpu_alloc->reg = reg; } return 0; @@ -1514,10 +1332,6 @@ bool kbase_check_alloc_flags(unsigned long flags) if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) return false; - /* BASE_MEM_IMPORT_SHARED is only valid for imported memory */ - if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED) - return false; - return true; } @@ -1572,923 +1386,3 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx) } KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); - -#ifdef CONFIG_DEBUG_FS -struct kbase_jit_debugfs_data { - int (*func)(struct kbase_jit_debugfs_data *); - struct mutex lock; - struct kbase_context *kctx; - u64 active_value; - u64 pool_value; - u64 destroy_value; - char buffer[50]; -}; - -static int kbase_jit_debugfs_common_open(struct inode *inode, - struct file *file, int (*func)(struct kbase_jit_debugfs_data *)) -{ - struct kbase_jit_debugfs_data *data; - - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - - data->func = func; - mutex_init(&data->lock); - data->kctx = (struct kbase_context *) inode->i_private; - - file->private_data = data; - - return nonseekable_open(inode, file); -} - -static ssize_t kbase_jit_debugfs_common_read(struct file *file, - char __user *buf, size_t len, loff_t *ppos) -{ - struct kbase_jit_debugfs_data *data; - size_t size; - int ret; - - data = (struct kbase_jit_debugfs_data *) file->private_data; - mutex_lock(&data->lock); - - if (*ppos) { - size = strnlen(data->buffer, sizeof(data->buffer)); - } else { - if (!data->func) { - ret = -EACCES; - goto out_unlock; - } - - if (data->func(data)) { - ret = -EACCES; - goto out_unlock; - } - - size = scnprintf(data->buffer, sizeof(data->buffer), - "%llu,%llu,%llu", data->active_value, - data->pool_value, data->destroy_value); - } - - ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size); - -out_unlock: - mutex_unlock(&data->lock); - return ret; -} - -static int kbase_jit_debugfs_common_release(struct inode *inode, - struct file *file) -{ - kfree(file->private_data); - return 0; -} - -#define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \ -static int __fops ## _open(struct inode *inode, struct file *file) \ -{ \ - return kbase_jit_debugfs_common_open(inode, file, __func); \ -} \ -static const struct file_operations __fops = { \ - .owner = THIS_MODULE, \ - .open = __fops ## _open, \ - .release = kbase_jit_debugfs_common_release, \ - .read = kbase_jit_debugfs_common_read, \ - .write = NULL, \ - .llseek = generic_file_llseek, \ -} - -static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data) -{ - struct kbase_context *kctx = data->kctx; - struct list_head *tmp; - - mutex_lock(&kctx->jit_lock); - list_for_each(tmp, &kctx->jit_active_head) { - data->active_value++; - } - - list_for_each(tmp, &kctx->jit_pool_head) { - data->pool_value++; - } - - list_for_each(tmp, &kctx->jit_destroy_head) { - data->destroy_value++; - } - mutex_unlock(&kctx->jit_lock); - - return 0; -} -KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops, - kbase_jit_debugfs_count_get); - -static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data) -{ - struct kbase_context *kctx = data->kctx; - struct kbase_va_region *reg; - - mutex_lock(&kctx->jit_lock); - list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { - data->active_value += reg->nr_pages; - } - - list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { - data->pool_value += reg->nr_pages; - } - - list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { - data->destroy_value += reg->nr_pages; - } - mutex_unlock(&kctx->jit_lock); - - return 0; -} -KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops, - kbase_jit_debugfs_vm_get); - -static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) -{ - struct kbase_context *kctx = data->kctx; - struct kbase_va_region *reg; - - mutex_lock(&kctx->jit_lock); - list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { - data->active_value += reg->gpu_alloc->nents; - } - - list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) { - data->pool_value += reg->gpu_alloc->nents; - } - - list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) { - data->destroy_value += reg->gpu_alloc->nents; - } - mutex_unlock(&kctx->jit_lock); - - return 0; -} -KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, - kbase_jit_debugfs_phys_get); - -void kbase_jit_debugfs_add(struct kbase_context *kctx) -{ - /* Debugfs entry for getting the number of JIT allocations. */ - debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry, - kctx, &kbase_jit_debugfs_count_fops); - - /* - * Debugfs entry for getting the total number of virtual pages - * used by JIT allocations. - */ - debugfs_create_file("mem_jit_vm", S_IRUGO, kctx->kctx_dentry, - kctx, &kbase_jit_debugfs_vm_fops); - - /* - * Debugfs entry for getting the number of physical pages used - * by JIT allocations. - */ - debugfs_create_file("mem_jit_phys", S_IRUGO, kctx->kctx_dentry, - kctx, &kbase_jit_debugfs_phys_fops); -} -#endif /* CONFIG_DEBUG_FS */ - -/** - * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations - * @work: Work item - * - * This function does the work of freeing JIT allocations whose physical - * backing has been released. - */ -static void kbase_jit_destroy_worker(struct work_struct *work) -{ - struct kbase_context *kctx; - struct kbase_va_region *reg; - - kctx = container_of(work, struct kbase_context, jit_work); - do { - mutex_lock(&kctx->jit_lock); - if (list_empty(&kctx->jit_destroy_head)) - reg = NULL; - else - reg = list_first_entry(&kctx->jit_destroy_head, - struct kbase_va_region, jit_node); - - if (reg) { - list_del(®->jit_node); - mutex_unlock(&kctx->jit_lock); - - kbase_gpu_vm_lock(kctx); - kbase_mem_free_region(kctx, reg); - kbase_gpu_vm_unlock(kctx); - } else - mutex_unlock(&kctx->jit_lock); - } while (reg); -} - -int kbase_jit_init(struct kbase_context *kctx) -{ - INIT_LIST_HEAD(&kctx->jit_active_head); - INIT_LIST_HEAD(&kctx->jit_pool_head); - INIT_LIST_HEAD(&kctx->jit_destroy_head); - mutex_init(&kctx->jit_lock); - INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); - - return 0; -} - -struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, - struct base_jit_alloc_info *info) -{ - struct kbase_va_region *reg = NULL; - struct kbase_va_region *walker; - struct kbase_va_region *temp; - size_t current_diff = SIZE_MAX; - - int ret; - - mutex_lock(&kctx->jit_lock); - /* - * Scan the pool for an existing allocation which meets our - * requirements and remove it. - */ - list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head, jit_node) { - - if (walker->nr_pages >= info->va_pages) { - size_t min_size, max_size, diff; - - /* - * The JIT allocations VA requirements have been - * meet, it's suitable but other allocations - * might be a better fit. - */ - min_size = min_t(size_t, walker->gpu_alloc->nents, - info->commit_pages); - max_size = max_t(size_t, walker->gpu_alloc->nents, - info->commit_pages); - diff = max_size - min_size; - - if (current_diff > diff) { - current_diff = diff; - reg = walker; - } - - /* The allocation is an exact match, stop looking */ - if (current_diff == 0) - break; - } - } - - if (reg) { - /* - * Remove the found region from the pool and add it to the - * active list. - */ - list_del_init(®->jit_node); - list_add(®->jit_node, &kctx->jit_active_head); - - /* Release the jit lock before modifying the allocation */ - mutex_unlock(&kctx->jit_lock); - - kbase_gpu_vm_lock(kctx); - - /* Make the physical backing no longer reclaimable */ - if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) - goto update_failed; - - /* Grow the backing if required */ - if (reg->gpu_alloc->nents < info->commit_pages) { - size_t delta; - size_t old_size = reg->gpu_alloc->nents; - - /* Allocate some more pages */ - delta = info->commit_pages - reg->gpu_alloc->nents; - if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, delta) - != 0) - goto update_failed; - - if (reg->cpu_alloc != reg->gpu_alloc) { - if (kbase_alloc_phy_pages_helper( - reg->cpu_alloc, delta) != 0) { - kbase_free_phy_pages_helper( - reg->gpu_alloc, delta); - goto update_failed; - } - } - - ret = kbase_mem_grow_gpu_mapping(kctx, reg, - info->commit_pages, old_size); - /* - * The grow failed so put the allocation back in the - * pool and return failure. - */ - if (ret) - goto update_failed; - } - kbase_gpu_vm_unlock(kctx); - } else { - /* No suitable JIT allocation was found so create a new one */ - u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD | - BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | - BASE_MEM_COHERENT_LOCAL; - u64 gpu_addr; - u16 alignment; - - mutex_unlock(&kctx->jit_lock); - - reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, - info->extent, &flags, &gpu_addr, &alignment); - if (!reg) - goto out_unlocked; - - mutex_lock(&kctx->jit_lock); - list_add(®->jit_node, &kctx->jit_active_head); - mutex_unlock(&kctx->jit_lock); - } - - return reg; - -update_failed: - /* - * An update to an allocation from the pool failed, chances - * are slim a new allocation would fair any better so return - * the allocation to the pool and return the function with failure. - */ - kbase_gpu_vm_unlock(kctx); - mutex_lock(&kctx->jit_lock); - list_del_init(®->jit_node); - list_add(®->jit_node, &kctx->jit_pool_head); - mutex_unlock(&kctx->jit_lock); -out_unlocked: - return NULL; -} - -void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) -{ - /* The physical backing of memory in the pool is always reclaimable */ - down_read(&kctx->process_mm->mmap_sem); - kbase_gpu_vm_lock(kctx); - kbase_mem_evictable_make(reg->gpu_alloc); - kbase_gpu_vm_unlock(kctx); - up_read(&kctx->process_mm->mmap_sem); - - mutex_lock(&kctx->jit_lock); - list_del_init(®->jit_node); - list_add(®->jit_node, &kctx->jit_pool_head); - mutex_unlock(&kctx->jit_lock); -} - -void kbase_jit_backing_lost(struct kbase_va_region *reg) -{ - struct kbase_context *kctx = reg->kctx; - - /* - * JIT allocations will always be on a list, if the region - * is not on a list then it's not a JIT allocation. - */ - if (list_empty(®->jit_node)) - return; - - /* - * Freeing the allocation requires locks we might not be able - * to take now, so move the allocation to the free list and kick - * the worker which will do the freeing. - */ - mutex_lock(&kctx->jit_lock); - list_del_init(®->jit_node); - list_add(®->jit_node, &kctx->jit_destroy_head); - mutex_unlock(&kctx->jit_lock); - - schedule_work(&kctx->jit_work); -} - -bool kbase_jit_evict(struct kbase_context *kctx) -{ - struct kbase_va_region *reg = NULL; - - lockdep_assert_held(&kctx->reg_lock); - - /* Free the oldest allocation from the pool */ - mutex_lock(&kctx->jit_lock); - if (!list_empty(&kctx->jit_pool_head)) { - reg = list_entry(kctx->jit_pool_head.prev, - struct kbase_va_region, jit_node); - list_del(®->jit_node); - } - mutex_unlock(&kctx->jit_lock); - - if (reg) - kbase_mem_free_region(kctx, reg); - - return (reg != NULL); -} - -void kbase_jit_term(struct kbase_context *kctx) -{ - struct kbase_va_region *walker; - - /* Free all allocations for this context */ - - /* - * Flush the freeing of allocations whose backing has been freed - * (i.e. everything in jit_destroy_head). - */ - cancel_work_sync(&kctx->jit_work); - - kbase_gpu_vm_lock(kctx); - /* Free all allocations from the pool */ - while (!list_empty(&kctx->jit_pool_head)) { - walker = list_first_entry(&kctx->jit_pool_head, - struct kbase_va_region, jit_node); - list_del(&walker->jit_node); - kbase_mem_free_region(kctx, walker); - } - - /* Free all allocations from active list */ - while (!list_empty(&kctx->jit_active_head)) { - walker = list_first_entry(&kctx->jit_active_head, - struct kbase_va_region, jit_node); - list_del(&walker->jit_node); - kbase_mem_free_region(kctx, walker); - } - kbase_gpu_vm_unlock(kctx); -} - -static int kbase_jd_user_buf_map(struct kbase_context *kctx, - struct kbase_va_region *reg) -{ - long pinned_pages; - struct kbase_mem_phy_alloc *alloc; - struct page **pages; - phys_addr_t *pa; - long i; - int err = -ENOMEM; - unsigned long address; - struct mm_struct *mm; - struct device *dev; - unsigned long offset; - unsigned long local_size; - - alloc = reg->gpu_alloc; - pa = kbase_get_gpu_phy_pages(reg); - address = alloc->imported.user_buf.address; - mm = alloc->imported.user_buf.mm; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); - - pages = alloc->imported.user_buf.pages; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) - pinned_pages = get_user_pages(NULL, mm, - address, - alloc->imported.user_buf.nr_pages, - reg->flags & KBASE_REG_GPU_WR, - 0, pages, NULL); -#else - pinned_pages = get_user_pages_remote(NULL, mm, - address, - alloc->imported.user_buf.nr_pages, - reg->flags & KBASE_REG_GPU_WR, - 0, pages, NULL); -#endif - - if (pinned_pages <= 0) - return pinned_pages; - - if (pinned_pages != alloc->imported.user_buf.nr_pages) { - for (i = 0; i < pinned_pages; i++) - put_page(pages[i]); - return -ENOMEM; - } - - dev = kctx->kbdev->dev; - offset = address & ~PAGE_MASK; - local_size = alloc->imported.user_buf.size; - - for (i = 0; i < pinned_pages; i++) { - dma_addr_t dma_addr; - unsigned long min; - - min = MIN(PAGE_SIZE - offset, local_size); - dma_addr = dma_map_page(dev, pages[i], - offset, min, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, dma_addr)) - goto unwind; - - alloc->imported.user_buf.dma_addrs[i] = dma_addr; - pa[i] = page_to_phys(pages[i]); - - local_size -= min; - offset = 0; - } - - alloc->nents = pinned_pages; - - err = kbase_mmu_insert_pages(kctx, reg->start_pfn, pa, - kbase_reg_current_backed_size(reg), - reg->flags); - if (err == 0) - return 0; - - alloc->nents = 0; - /* fall down */ -unwind: - while (i--) { - dma_unmap_page(kctx->kbdev->dev, - alloc->imported.user_buf.dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL); - put_page(pages[i]); - pages[i] = NULL; - } - - return err; -} - -static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc, bool writeable) -{ - long i; - struct page **pages; - unsigned long size = alloc->imported.user_buf.size; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF); - pages = alloc->imported.user_buf.pages; - for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { - unsigned long local_size; - dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - - local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK)); - dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size, - DMA_BIDIRECTIONAL); - if (writeable) - set_page_dirty_lock(pages[i]); - put_page(pages[i]); - pages[i] = NULL; - - size -= local_size; - } - alloc->nents = 0; -} - - -/* to replace sg_dma_len. */ -#define MALI_SG_DMA_LEN(sg) ((sg)->length) - -#ifdef CONFIG_DMA_SHARED_BUFFER -static int kbase_jd_umm_map(struct kbase_context *kctx, - struct kbase_va_region *reg) -{ - struct sg_table *sgt; - struct scatterlist *s; - int i; - phys_addr_t *pa; - int err; - size_t count = 0; - struct kbase_mem_phy_alloc *alloc; - - alloc = reg->gpu_alloc; - - KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM); - KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt); - sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, - DMA_BIDIRECTIONAL); - - if (IS_ERR_OR_NULL(sgt)) - return -EINVAL; - - /* save for later */ - alloc->imported.umm.sgt = sgt; - - pa = kbase_get_gpu_phy_pages(reg); - KBASE_DEBUG_ASSERT(pa); - - for_each_sg(sgt->sgl, s, sgt->nents, i) { - int j; - size_t pages = PFN_UP(MALI_SG_DMA_LEN(s)); - - WARN_ONCE(MALI_SG_DMA_LEN(s) & (PAGE_SIZE-1), - "MALI_SG_DMA_LEN(s)=%u is not a multiple of PAGE_SIZE\n", - MALI_SG_DMA_LEN(s)); - - WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), - "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", - (unsigned long long) sg_dma_address(s)); - - for (j = 0; (j < pages) && (count < reg->nr_pages); j++, - count++) - *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT); - WARN_ONCE(j < pages, - "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", - alloc->imported.umm.dma_buf->size); - } - - if (WARN_ONCE(count < reg->nr_pages, - "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", - alloc->imported.umm.dma_buf->size)) { - err = -EINVAL; - goto out; - } - - /* Update nents as we now have pages to map */ - alloc->nents = count; - - err = kbase_mmu_insert_pages(kctx, reg->start_pfn, - kbase_get_gpu_phy_pages(reg), - kbase_reg_current_backed_size(reg), - reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD); - -out: - if (err) { - dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, - alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); - alloc->imported.umm.sgt = NULL; - } - - return err; -} - -static void kbase_jd_umm_unmap(struct kbase_context *kctx, - struct kbase_mem_phy_alloc *alloc) -{ - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(alloc); - KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment); - KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt); - dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, - alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); - alloc->imported.umm.sgt = NULL; - alloc->nents = 0; -} -#endif /* CONFIG_DMA_SHARED_BUFFER */ - -#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) \ - || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS) -static void add_kds_resource(struct kds_resource *kds_res, - struct kds_resource **kds_resources, u32 *kds_res_count, - unsigned long *kds_access_bitmap, bool exclusive) -{ - u32 i; - - for (i = 0; i < *kds_res_count; i++) { - /* Duplicate resource, ignore */ - if (kds_resources[i] == kds_res) - return; - } - - kds_resources[*kds_res_count] = kds_res; - if (exclusive) - set_bit(*kds_res_count, kds_access_bitmap); - (*kds_res_count)++; -} -#endif - -struct kbase_mem_phy_alloc *kbase_map_external_resource( - struct kbase_context *kctx, struct kbase_va_region *reg, - struct mm_struct *locked_mm -#ifdef CONFIG_KDS - , u32 *kds_res_count, struct kds_resource **kds_resources, - unsigned long *kds_access_bitmap, bool exclusive -#endif - ) -{ - int err; - - /* decide what needs to happen for this resource */ - switch (reg->gpu_alloc->type) { - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { - if (reg->gpu_alloc->imported.user_buf.mm != locked_mm) - goto exit; - - reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; - if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { - err = kbase_jd_user_buf_map(kctx, reg); - if (err) { - reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; - goto exit; - } - } - } - break; - case KBASE_MEM_TYPE_IMPORTED_UMP: { -#if defined(CONFIG_KDS) && defined(CONFIG_UMP) - if (kds_res_count) { - struct kds_resource *kds_res; - - kds_res = ump_dd_kds_resource_get( - reg->gpu_alloc->imported.ump_handle); - if (kds_res) - add_kds_resource(kds_res, kds_resources, - kds_res_count, - kds_access_bitmap, exclusive); - } -#endif /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */ - break; - } -#ifdef CONFIG_DMA_SHARED_BUFFER - case KBASE_MEM_TYPE_IMPORTED_UMM: { -#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS - if (kds_res_count) { - struct kds_resource *kds_res; - - kds_res = get_dma_buf_kds_resource( - reg->gpu_alloc->imported.umm.dma_buf); - if (kds_res) - add_kds_resource(kds_res, kds_resources, - kds_res_count, - kds_access_bitmap, exclusive); - } -#endif - reg->gpu_alloc->imported.umm.current_mapping_usage_count++; - if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) { - err = kbase_jd_umm_map(kctx, reg); - if (err) { - reg->gpu_alloc->imported.umm.current_mapping_usage_count--; - goto exit; - } - } - break; - } -#endif - default: - goto exit; - } - - return kbase_mem_phy_alloc_get(reg->gpu_alloc); -exit: - return NULL; -} - -void kbase_unmap_external_resource(struct kbase_context *kctx, - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc) -{ - switch (alloc->type) { -#ifdef CONFIG_DMA_SHARED_BUFFER - case KBASE_MEM_TYPE_IMPORTED_UMM: { - alloc->imported.umm.current_mapping_usage_count--; - - if (0 == alloc->imported.umm.current_mapping_usage_count) { - if (reg && reg->gpu_alloc == alloc) - kbase_mmu_teardown_pages( - kctx, - reg->start_pfn, - kbase_reg_current_backed_size(reg)); - - kbase_jd_umm_unmap(kctx, alloc); - } - } - break; -#endif /* CONFIG_DMA_SHARED_BUFFER */ - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { - alloc->imported.user_buf.current_mapping_usage_count--; - - if (0 == alloc->imported.user_buf.current_mapping_usage_count) { - bool writeable = true; - - if (reg && reg->gpu_alloc == alloc) - kbase_mmu_teardown_pages( - kctx, - reg->start_pfn, - kbase_reg_current_backed_size(reg)); - - if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0)) - writeable = false; - - kbase_jd_user_buf_unmap(kctx, alloc, writeable); - } - } - break; - default: - break; - } - kbase_mem_phy_alloc_put(alloc); -} - -struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( - struct kbase_context *kctx, u64 gpu_addr) -{ - struct kbase_ctx_ext_res_meta *meta = NULL; - struct kbase_ctx_ext_res_meta *walker; - - lockdep_assert_held(&kctx->reg_lock); - - /* - * Walk the per context external resource metadata list for the - * metadata which matches the region which is being acquired. - */ - list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) { - if (walker->gpu_addr == gpu_addr) { - meta = walker; - break; - } - } - - /* No metadata exists so create one. */ - if (!meta) { - struct kbase_va_region *reg; - - /* Find the region */ - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, gpu_addr); - if (NULL == reg || (reg->flags & KBASE_REG_FREE)) - goto failed; - - /* Allocate the metadata object */ - meta = kzalloc(sizeof(*meta), GFP_KERNEL); - if (!meta) - goto failed; - - /* - * Fill in the metadata object and acquire a reference - * for the physical resource. - */ - meta->alloc = kbase_map_external_resource(kctx, reg, NULL -#ifdef CONFIG_KDS - , NULL, NULL, - NULL, false -#endif - ); - - if (!meta->alloc) - goto fail_map; - - meta->gpu_addr = reg->start_pfn << PAGE_SHIFT; - - list_add(&meta->ext_res_node, &kctx->ext_res_meta_head); - } - - return meta; - -fail_map: - kfree(meta); -failed: - return NULL; -} - -bool kbase_sticky_resource_release(struct kbase_context *kctx, - struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr) -{ - struct kbase_ctx_ext_res_meta *walker; - struct kbase_va_region *reg; - - lockdep_assert_held(&kctx->reg_lock); - - /* Search of the metadata if one isn't provided. */ - if (!meta) { - /* - * Walk the per context external resource metadata list for the - * metadata which matches the region which is being released. - */ - list_for_each_entry(walker, &kctx->ext_res_meta_head, - ext_res_node) { - if (walker->gpu_addr == gpu_addr) { - meta = walker; - break; - } - } - } - - /* No metadata so just return. */ - if (!meta) - return false; - - /* Drop the physical memory reference and free the metadata. */ - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, - meta->gpu_addr); - - kbase_unmap_external_resource(kctx, reg, meta->alloc); - list_del(&meta->ext_res_node); - kfree(meta); - - return true; -} - -int kbase_sticky_resource_init(struct kbase_context *kctx) -{ - INIT_LIST_HEAD(&kctx->ext_res_meta_head); - - return 0; -} - -void kbase_sticky_resource_term(struct kbase_context *kctx) -{ - struct kbase_ctx_ext_res_meta *walker; - - lockdep_assert_held(&kctx->reg_lock); - - /* - * Free any sticky resources which haven't been unmapped. - * - * Note: - * We don't care about refcounts at this point as no future - * references to the meta data will be made. - * Region termination would find these if we didn't free them - * here, but it's more efficient if we do the clean up here. - */ - while (!list_empty(&kctx->ext_res_meta_head)) { - walker = list_first_entry(&kctx->ext_res_meta_head, - struct kbase_ctx_ext_res_meta, ext_res_node); - - kbase_sticky_resource_release(kctx, walker, 0); - } -} diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h index 7b2433e868bdd..7372e1088bd40 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,9 +30,7 @@ #endif #include -#ifdef CONFIG_KDS -#include -#endif /* CONFIG_KDS */ + #ifdef CONFIG_UMP #include #endif /* CONFIG_UMP */ @@ -43,8 +41,6 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include "mali_kbase_gator.h" #endif -/* Required for kbase_mem_evictable_unmake */ -#include "mali_kbase_mem_linux.h" /* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ @@ -116,23 +112,11 @@ struct kbase_mem_phy_alloc { /* kbase_cpu_mappings */ struct list_head mappings; - /* Node used to store this allocation on the eviction list */ - struct list_head evict_node; - /* Physical backing size when the pages where evicted */ - size_t evicted; - /* - * Back reference to the region structure which created this - * allocation, or NULL if it has been freed. - */ - struct kbase_va_region *reg; - /* type of buffer */ enum kbase_memory_type type; unsigned long properties; - struct list_head zone_cache; - /* member in union valid based on @a type */ union { #ifdef CONFIG_UMP @@ -159,7 +143,7 @@ struct kbase_mem_phy_alloc { unsigned long nr_pages; struct page **pages; unsigned int current_mapping_usage_count; - struct mm_struct *mm; + struct task_struct *owner; dma_addr_t *dma_addrs; } user_buf; } imported; @@ -258,8 +242,6 @@ struct kbase_va_region { #define KBASE_REG_SECURE (1ul << 19) -#define KBASE_REG_DONT_NEED (1ul << 20) - #define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) /* only used with 32-bit clients */ @@ -294,8 +276,6 @@ struct kbase_va_region { /* non-NULL if this memory object is a kds_resource */ struct kds_resource *kds_res; - /* List head used to store the region in the JIT allocation pool */ - struct list_head jit_node; }; /* Common functions */ @@ -375,7 +355,6 @@ static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, en alloc->pages = (void *)(alloc + 1); INIT_LIST_HEAD(&alloc->mappings); alloc->type = type; - INIT_LIST_HEAD(&alloc->zone_cache); if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) alloc->imported.user_buf.dma_addrs = @@ -399,17 +378,14 @@ static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, else if (!reg->cpu_alloc) return -ENOMEM; reg->cpu_alloc->imported.kctx = kctx; - INIT_LIST_HEAD(®->cpu_alloc->evict_node); if (kctx->infinite_cache_active && (reg->flags & KBASE_REG_CPU_CACHED)) { reg->gpu_alloc = kbase_alloc_create(reg->nr_pages, KBASE_MEM_TYPE_NATIVE); reg->gpu_alloc->imported.kctx = kctx; - INIT_LIST_HEAD(®->gpu_alloc->evict_node); } else { reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); } - INIT_LIST_HEAD(®->jit_node); reg->flags &= ~KBASE_REG_FREE; return 0; } @@ -529,13 +505,11 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, * @pages: Pointer to array holding the physical addresses of the pages to * free. * @dirty: Whether any pages may be dirty in the cache. - * @reclaimed: Whether the pages where reclaimable and thus should bypass - * the pool and go straight to the kernel. * * Like kbase_mem_pool_free() but optimized for freeing many pages. */ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, - phys_addr_t *pages, bool dirty, bool reclaimed); + phys_addr_t *pages, bool dirty); /** * kbase_mem_pool_size - Get number of free pages in memory pool @@ -587,7 +561,6 @@ size_t kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size); int kbase_region_tracker_init(struct kbase_context *kctx); -int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages); void kbase_region_tracker_term(struct kbase_context *kctx); struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr); @@ -618,9 +591,6 @@ void kbase_mmu_term(struct kbase_context *kctx); phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx); void kbase_mmu_free_pgd(struct kbase_context *kctx); -int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, - phys_addr_t *phys, size_t nr, - unsigned long flags); int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags); @@ -653,12 +623,6 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg); void kbase_mmu_update(struct kbase_context *kctx); /** - * kbase_mmu_disable() - Disable the MMU for a previously active kbase context. - * @kctx: Kbase context - * - * Disable and perform the required cache maintenance to remove the all - * data from provided kbase context from the GPU caches. - * * The caller has the following locking conditions: * - It must hold kbase_as::transaction_mutex on kctx's address space * - It must hold the kbasep_js_device_data::runpool_irq::lock @@ -666,13 +630,11 @@ void kbase_mmu_update(struct kbase_context *kctx); void kbase_mmu_disable(struct kbase_context *kctx); /** - * kbase_mmu_disable_as() - Set the MMU to unmapped mode for the specified - * address space. - * @kbdev: Kbase device - * @as_nr: The address space number to set to unmapped. + * kbase_mmu_disable_as() - set the MMU in unmapped mode for an address space. * - * This function must only be called during reset/power-up and it used to - * ensure the registers are in a known state. + * @kbdev: Kbase device + * @as_nr: Number of the address space for which the MMU + * should be set in unmapped mode. * * The caller must hold kbdev->as[as_nr].transaction_mutex. */ @@ -892,166 +854,4 @@ void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size, enum dma_data_direction dir); -#ifdef CONFIG_DEBUG_FS -/** - * kbase_jit_debugfs_add - Add per context debugfs entry for JIT. - * @kctx: kbase context - */ -void kbase_jit_debugfs_add(struct kbase_context *kctx); -#endif /* CONFIG_DEBUG_FS */ - -/** - * kbase_jit_init - Initialize the JIT memory pool management - * @kctx: kbase context - * - * Returns zero on success or negative error number on failure. - */ -int kbase_jit_init(struct kbase_context *kctx); - -/** - * kbase_jit_allocate - Allocate JIT memory - * @kctx: kbase context - * @info: JIT allocation information - * - * Return: JIT allocation on success or NULL on failure. - */ -struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, - struct base_jit_alloc_info *info); - -/** - * kbase_jit_free - Free a JIT allocation - * @kctx: kbase context - * @reg: JIT allocation - * - * Frees a JIT allocation and places it into the free pool for later reuse. - */ -void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg); - -/** - * kbase_jit_backing_lost - Inform JIT that an allocation has lost backing - * @reg: JIT allocation - */ -void kbase_jit_backing_lost(struct kbase_va_region *reg); - -/** - * kbase_jit_evict - Evict a JIT allocation from the pool - * @kctx: kbase context - * - * Evict the least recently used JIT allocation from the pool. This can be - * required if normal VA allocations are failing due to VA exhaustion. - * - * Return: True if a JIT allocation was freed, false otherwise. - */ -bool kbase_jit_evict(struct kbase_context *kctx); - -/** - * kbase_jit_term - Terminate the JIT memory pool management - * @kctx: kbase context - */ -void kbase_jit_term(struct kbase_context *kctx); - -/** - * kbase_map_external_resource - Map an external resource to the GPU. - * @kctx: kbase context. - * @reg: The region to map. - * @locked_mm: The mm_struct which has been locked for this operation. - * @kds_res_count: The number of KDS resources. - * @kds_resources: Array of KDS resources. - * @kds_access_bitmap: Access bitmap for KDS. - * @exclusive: If the KDS resource requires exclusive access. - * - * Return: The physical allocation which backs the region on success or NULL - * on failure. - */ -struct kbase_mem_phy_alloc *kbase_map_external_resource( - struct kbase_context *kctx, struct kbase_va_region *reg, - struct mm_struct *locked_mm -#ifdef CONFIG_KDS - , u32 *kds_res_count, struct kds_resource **kds_resources, - unsigned long *kds_access_bitmap, bool exclusive -#endif - ); - -/** - * kbase_unmap_external_resource - Unmap an external resource from the GPU. - * @kctx: kbase context. - * @reg: The region to unmap or NULL if it has already been released. - * @alloc: The physical allocation being unmapped. - */ -void kbase_unmap_external_resource(struct kbase_context *kctx, - struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc); - -/** - * kbase_sticky_resource_init - Initialize sticky resource management. - * @kctx: kbase context - * - * Returns zero on success or negative error number on failure. - */ -int kbase_sticky_resource_init(struct kbase_context *kctx); - -/** - * kbase_sticky_resource_acquire - Acquire a reference on a sticky resource. - * @kctx: kbase context. - * @gpu_addr: The GPU address of the external resource. - * - * Return: The metadata object which represents the binding between the - * external resource and the kbase context on success or NULL on failure. - */ -struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire( - struct kbase_context *kctx, u64 gpu_addr); - -/** - * kbase_sticky_resource_release - Release a reference on a sticky resource. - * @kctx: kbase context. - * @meta: Binding metadata. - * @gpu_addr: GPU address of the external resource. - * - * If meta is NULL then gpu_addr will be used to scan the metadata list and - * find the matching metadata (if any), otherwise the provided meta will be - * used and gpu_addr will be ignored. - * - * Return: True if the release found the metadata and the reference was dropped. - */ -bool kbase_sticky_resource_release(struct kbase_context *kctx, - struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr); - -/** - * kbase_sticky_resource_term - Terminate sticky resource management. - * @kctx: kbase context - */ -void kbase_sticky_resource_term(struct kbase_context *kctx); - -/** - * kbase_zone_cache_update - Update the memory zone cache after new pages have - * been added. - * @alloc: The physical memory allocation to build the cache for. - * @start_offset: Offset to where the new pages start. - * - * Updates an existing memory zone cache, updating the counters for the - * various zones. - * If the memory allocation doesn't already have a zone cache assume that - * one isn't created and thus don't do anything. - * - * Return: Zero cache was updated, negative error code on error. - */ -int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc, - size_t start_offset); - -/** - * kbase_zone_cache_build - Build the memory zone cache. - * @alloc: The physical memory allocation to build the cache for. - * - * Create a new zone cache for the provided physical memory allocation if - * one doesn't already exist, if one does exist then just return. - * - * Return: Zero if the zone cache was created, negative error code on error. - */ -int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc); - -/** - * kbase_zone_cache_clear - Clear the memory zone cache. - * @alloc: The physical memory allocation to clear the cache on. - */ -void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc); - #endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c index f91d3c916355e..b359f4d941486 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,52 +36,15 @@ #ifdef CONFIG_DMA_SHARED_BUFFER #include #endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ -#include #include #include #include #include -#include static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); static const struct vm_operations_struct kbase_vm_ops; -/** - * kbase_mem_shrink_cpu_mapping - Shrink the CPU mapping(s) of an allocation - * @kctx: Context the region belongs to - * @reg: The GPU region - * @new_pages: The number of pages after the shrink - * @old_pages: The number of pages before the shrink - * - * Return: 0 on success, -errno on error. - * - * Shrink (or completely remove) all CPU mappings which reference the shrunk - * part of the allocation. - * - * Note: Caller must be holding the processes mmap_sem lock. - */ -static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages); - -/** - * kbase_mem_shrink_gpu_mapping - Shrink the GPU mapping of an allocation - * @kctx: Context the region belongs to - * @reg: The GPU region or NULL if there isn't one - * @new_pages: The number of pages after the shrink - * @old_pages: The number of pages before the shrink - * - * Return: 0 on success, negative -errno on error - * - * Unmap the shrunk pages from the GPU mapping. Note that the size of the region - * itself is unmodified as we still need to reserve the VA, only the page tables - * will be modified by this function. - */ -static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages); - struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment) { int zone; @@ -114,6 +77,9 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages #if defined(CONFIG_64BIT) if (kctx->is_compat) cpu_va_bits = 32; + else + /* force SAME_VA if a 64-bit client */ + *flags |= BASE_MEM_SAME_VA; #endif if (!kbase_check_alloc_flags(*flags)) { @@ -229,9 +195,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages if (*flags & BASE_MEM_PROT_CPU_WR) prot |= PROT_WRITE; - cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot, - MAP_SHARED, cookie); - + cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot, MAP_SHARED, + cookie); if (IS_ERR_VALUE(cpu_addr)) { kctx->pending_regions[cookie_nr] = NULL; kctx->cookies |= (1UL << cookie_nr); @@ -378,412 +343,12 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * c return ret; } -/** - * kbase_mem_evictable_reclaim_count_objects - Count number of pages in the - * Ephemeral memory eviction list. - * @s: Shrinker - * @sc: Shrinker control - * - * Return: Number of pages which can be freed. - */ -static -unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, - struct shrink_control *sc) -{ - struct kbase_context *kctx; - struct kbase_mem_phy_alloc *alloc; - unsigned long pages = 0; - - kctx = container_of(s, struct kbase_context, reclaim); - - mutex_lock(&kctx->evict_lock); - - list_for_each_entry(alloc, &kctx->evict_list, evict_node) - pages += alloc->nents; - - mutex_unlock(&kctx->evict_lock); - return pages; -} - -/** - * kbase_mem_evictable_reclaim_scan_objects - Scan the Ephemeral memory eviction - * list for pages and try to reclaim them. - * @s: Shrinker - * @sc: Shrinker control - * - * Return: Number of pages freed (can be less then requested) or -1 if the - * shrinker failed to free pages in its pool. - * - * Note: - * This function accesses region structures without taking the region lock, - * this is required as the OOM killer can call the shrinker after the region - * lock has already been held. - * This is safe as we can guarantee that a region on the eviction list will - * not be freed (kbase_mem_free_region removes the allocation from the list - * before destroying it), or modified by other parts of the driver. - * The eviction list itself is guarded by the eviction lock and the MMU updates - * are protected by their own lock. - */ -static -unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s, - struct shrink_control *sc) -{ - struct kbase_context *kctx; - struct kbase_mem_phy_alloc *alloc; - struct kbase_mem_phy_alloc *tmp; - unsigned long freed = 0; - - kctx = container_of(s, struct kbase_context, reclaim); - mutex_lock(&kctx->evict_lock); - - list_for_each_entry_safe(alloc, tmp, &kctx->evict_list, evict_node) { - int err; - - err = kbase_mem_shrink_gpu_mapping(kctx, alloc->reg, - 0, alloc->nents); - if (err != 0) { - /* - * Failed to remove GPU mapping, tell the shrinker - * to stop trying to shrink our slab even though we - * have pages in it. - */ - freed = -1; - goto out_unlock; - } - - /* - * Update alloc->evicted before freeing the backing so the - * helper can determine that it needs to bypass the accounting - * and memory pool. - */ - alloc->evicted = alloc->nents; - - kbase_free_phy_pages_helper(alloc, alloc->evicted); - freed += alloc->evicted; - list_del_init(&alloc->evict_node); - - /* - * Inform the JIT allocator this region has lost backing - * as it might need to free the allocation. - */ - kbase_jit_backing_lost(alloc->reg); - - /* Enough pages have been freed so stop now */ - if (freed > sc->nr_to_scan) - break; - } -out_unlock: - mutex_unlock(&kctx->evict_lock); - - return freed; -} - -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) -static int kbase_mem_evictable_reclaim_shrink(struct shrinker *s, - struct shrink_control *sc) -{ - if (sc->nr_to_scan == 0) - return kbase_mem_evictable_reclaim_count_objects(s, sc); - - return kbase_mem_evictable_reclaim_scan_objects(s, sc); -} -#endif - -int kbase_mem_evictable_init(struct kbase_context *kctx) -{ - INIT_LIST_HEAD(&kctx->evict_list); - mutex_init(&kctx->evict_lock); - - /* Register shrinker */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) - kctx->reclaim.shrink = kbase_mem_evictable_reclaim_shrink; -#else - kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects; - kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects; -#endif - kctx->reclaim.seeks = DEFAULT_SEEKS; - /* Kernel versions prior to 3.1 : - * struct shrinker does not define batch */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) - kctx->reclaim.batch = 0; -#endif - register_shrinker(&kctx->reclaim); - return 0; -} - -void kbase_mem_evictable_deinit(struct kbase_context *kctx) -{ - unregister_shrinker(&kctx->reclaim); -} - -struct kbase_mem_zone_cache_entry { - /* List head used to link the cache entry to the memory allocation. */ - struct list_head zone_node; - /* The zone the cacheline is for. */ - struct zone *zone; - /* The number of pages in the allocation which belong to this zone. */ - u64 count; -}; - -static bool kbase_zone_cache_builder(struct kbase_mem_phy_alloc *alloc, - size_t start_offset) -{ - struct kbase_mem_zone_cache_entry *cache = NULL; - size_t i; - int ret = 0; - - for (i = start_offset; i < alloc->nents; i++) { - struct page *p = phys_to_page(alloc->pages[i]); - struct zone *zone = page_zone(p); - bool create = true; - - if (cache && (cache->zone == zone)) { - /* - * Fast path check as most of the time adjacent - * pages come from the same zone. - */ - create = false; - } else { - /* - * Slow path check, walk all the cache entries to see - * if we already know about this zone. - */ - list_for_each_entry(cache, &alloc->zone_cache, zone_node) { - if (cache->zone == zone) { - create = false; - break; - } - } - } - - /* This zone wasn't found in the cache, create an entry for it */ - if (create) { - cache = kmalloc(sizeof(*cache), GFP_KERNEL); - if (!cache) { - ret = -ENOMEM; - goto bail; - } - cache->zone = zone; - cache->count = 0; - list_add(&cache->zone_node, &alloc->zone_cache); - } - - cache->count++; - } - return 0; - -bail: - return ret; -} - -int kbase_zone_cache_update(struct kbase_mem_phy_alloc *alloc, - size_t start_offset) -{ - /* - * Bail if the zone cache is empty, only update the cache if it - * existed in the first place. - */ - if (list_empty(&alloc->zone_cache)) - return 0; - - return kbase_zone_cache_builder(alloc, start_offset); -} - -int kbase_zone_cache_build(struct kbase_mem_phy_alloc *alloc) -{ - /* Bail if the zone cache already exists */ - if (!list_empty(&alloc->zone_cache)) - return 0; - - return kbase_zone_cache_builder(alloc, 0); -} - -void kbase_zone_cache_clear(struct kbase_mem_phy_alloc *alloc) -{ - struct kbase_mem_zone_cache_entry *walker; - - while(!list_empty(&alloc->zone_cache)){ - walker = list_first_entry(&alloc->zone_cache, - struct kbase_mem_zone_cache_entry, zone_node); - list_del(&walker->zone_node); - kfree(walker); - } -} - -/** - * kbase_mem_evictable_mark_reclaim - Mark the pages as reclaimable. - * @alloc: The physical allocation - */ -static void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc) -{ - struct kbase_context *kctx = alloc->imported.kctx; - struct kbase_mem_zone_cache_entry *zone_cache; - int __maybe_unused new_page_count; - int err; - - /* Attempt to build a zone cache of tracking */ - err = kbase_zone_cache_build(alloc); - if (err == 0) { - /* Bulk update all the zones */ - list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) { - zone_page_state_add(zone_cache->count, - zone_cache->zone, NR_SLAB_RECLAIMABLE); - } - } else { - /* Fall-back to page by page updates */ - int i; - - for (i = 0; i < alloc->nents; i++) { - struct page *p = phys_to_page(alloc->pages[i]); - struct zone *zone = page_zone(p); - - zone_page_state_add(1, zone, NR_SLAB_RECLAIMABLE); - } - } - - kbase_process_page_usage_dec(kctx, alloc->nents); - new_page_count = kbase_atomic_sub_pages(alloc->nents, - &kctx->used_pages); - kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); - - kbase_tlstream_aux_pagesalloc( - (u32)kctx->id, - (u64)new_page_count); -} - -/** - * kbase_mem_evictable_unmark_reclaim - Mark the pages as no longer reclaimable. - * @alloc: The physical allocation - */ -static -void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc) -{ - struct kbase_context *kctx = alloc->imported.kctx; - struct kbase_mem_zone_cache_entry *zone_cache; - int __maybe_unused new_page_count; - int err; - - new_page_count = kbase_atomic_add_pages(alloc->nents, - &kctx->used_pages); - kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages); - - /* Increase mm counters so that the allocation is accounted for - * against the process and thus is visible to the OOM killer, - * then remove it from the reclaimable accounting. */ - kbase_process_page_usage_inc(kctx, alloc->nents); - - /* Attempt to build a zone cache of tracking */ - err = kbase_zone_cache_build(alloc); - if (err == 0) { - /* Bulk update all the zones */ - list_for_each_entry(zone_cache, &alloc->zone_cache, zone_node) { - zone_page_state_add(-zone_cache->count, - zone_cache->zone, NR_SLAB_RECLAIMABLE); - } - } else { - /* Fall-back to page by page updates */ - int i; - - for (i = 0; i < alloc->nents; i++) { - struct page *p = phys_to_page(alloc->pages[i]); - struct zone *zone = page_zone(p); - - zone_page_state_add(-1, zone, NR_SLAB_RECLAIMABLE); - } - } - - kbase_tlstream_aux_pagesalloc( - (u32)kctx->id, - (u64)new_page_count); -} - -int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) -{ - struct kbase_context *kctx = gpu_alloc->imported.kctx; - int err; - - lockdep_assert_held(&kctx->reg_lock); - - /* This alloction can't already be on a list. */ - WARN_ON(!list_empty(&gpu_alloc->evict_node)); - - /* - * Try to shrink the CPU mappings as required, if we fail then - * fail the process of making this allocation evictable. - */ - err = kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg, - 0, gpu_alloc->nents); - if (err) - return -EINVAL; - - /* - * Add the allocation to the eviction list, after this point the shrink - * can reclaim it. - */ - mutex_lock(&kctx->evict_lock); - list_add(&gpu_alloc->evict_node, &kctx->evict_list); - mutex_unlock(&kctx->evict_lock); - kbase_mem_evictable_mark_reclaim(gpu_alloc); - - gpu_alloc->reg->flags |= KBASE_REG_DONT_NEED; - return 0; -} - -bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) -{ - struct kbase_context *kctx = gpu_alloc->imported.kctx; - int err = 0; - - lockdep_assert_held(&kctx->reg_lock); - - /* - * First remove the allocation from the eviction list as it's no - * longer eligible for eviction. - */ - mutex_lock(&kctx->evict_lock); - list_del_init(&gpu_alloc->evict_node); - mutex_unlock(&kctx->evict_lock); - - if (gpu_alloc->evicted == 0) { - /* - * The backing is still present, update the VM stats as it's - * in use again. - */ - kbase_mem_evictable_unmark_reclaim(gpu_alloc); - } else { - /* If the region is still alive ... */ - if (gpu_alloc->reg) { - /* ... allocate replacement backing ... */ - err = kbase_alloc_phy_pages_helper(gpu_alloc, - gpu_alloc->evicted); - - /* - * ... and grow the mapping back to its - * pre-eviction size. - */ - if (!err) - err = kbase_mem_grow_gpu_mapping(kctx, - gpu_alloc->reg, - gpu_alloc->evicted, 0); - - gpu_alloc->evicted = 0; - } - } - - /* If the region is still alive remove the DONT_NEED attribute. */ - if (gpu_alloc->reg) - gpu_alloc->reg->flags &= ~KBASE_REG_DONT_NEED; - - return (err == 0); -} - int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask) { struct kbase_va_region *reg; int ret = -EINVAL; unsigned int real_flags = 0; unsigned int prev_flags = 0; - bool prev_needed, new_needed; KBASE_DEBUG_ASSERT(kctx); @@ -794,11 +359,11 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in flags &= mask; /* check for only supported flags */ - if (flags & ~(BASE_MEM_FLAGS_MODIFIABLE)) + if (flags & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL)) goto out; /* mask covers bits we don't support? */ - if (mask & ~(BASE_MEM_FLAGS_MODIFIABLE)) + if (mask & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL)) goto out; /* convert flags */ @@ -808,7 +373,6 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in real_flags |= KBASE_REG_SHARE_IN; /* now we can lock down the context, and find the region */ - down_write(¤t->mm->mmap_sem); kbase_gpu_vm_lock(kctx); /* Validate the region */ @@ -816,28 +380,6 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in if (!reg || (reg->flags & KBASE_REG_FREE)) goto out_unlock; - /* Is the region being transitioning between not needed and needed? */ - prev_needed = (KBASE_REG_DONT_NEED & reg->flags) == KBASE_REG_DONT_NEED; - new_needed = (BASE_MEM_DONT_NEED & flags) == BASE_MEM_DONT_NEED; - if (prev_needed != new_needed) { - /* Aliased allocations can't be made ephemeral */ - if (atomic_read(®->cpu_alloc->gpu_mappings) > 1) - goto out_unlock; - - if (new_needed) { - /* Only native allocations can be marked not needed */ - if (reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { - ret = -EINVAL; - goto out_unlock; - } - ret = kbase_mem_evictable_make(reg->gpu_alloc); - if (ret) - goto out_unlock; - } else { - kbase_mem_evictable_unmake(reg->gpu_alloc); - } - } - /* limit to imported memory */ if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) && (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) @@ -880,7 +422,6 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in out_unlock: kbase_gpu_vm_unlock(kctx); - up_write(¤t->mm->mmap_sem); out: return ret; } @@ -1011,7 +552,6 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in struct kbase_va_region *reg; struct dma_buf *dma_buf; struct dma_buf_attachment *dma_attachment; - bool shared_zone = false; dma_buf = dma_buf_get(fd); if (IS_ERR_OR_NULL(dma_buf)) @@ -1032,23 +572,15 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in /* ignore SAME_VA */ *flags &= ~BASE_MEM_SAME_VA; - if (*flags & BASE_MEM_IMPORT_SHARED) - shared_zone = true; - #ifdef CONFIG_64BIT if (!kctx->is_compat) { - /* - * 64-bit tasks require us to reserve VA on the CPU that we use - * on the GPU. - */ - shared_zone = true; - } -#endif - - if (shared_zone) { + /* 64-bit tasks must MMAP anyway, but not expose this address to clients */ *flags |= BASE_MEM_NEED_MMAP; reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA); } else { +#else + if (1) { +#endif reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); } @@ -1086,7 +618,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, in /* no read or write permission given on import, only on run do we give the right permissions */ - reg->gpu_alloc->type = KBASE_MEM_TYPE_IMPORTED_UMM; + reg->gpu_alloc->type = BASE_MEM_IMPORT_TYPE_UMM; reg->gpu_alloc->imported.umm.sgt = NULL; reg->gpu_alloc->imported.umm.dma_buf = dma_buf; reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; @@ -1115,7 +647,6 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( struct kbase_va_region *reg; long faulted_pages; int zone = KBASE_REG_ZONE_CUSTOM_VA; - bool shared_zone = false; *va_pages = (PAGE_ALIGN(address + size) >> PAGE_SHIFT) - PFN_DOWN(address); @@ -1129,24 +660,14 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( /* SAME_VA generally not supported with imported memory (no known use cases) */ *flags &= ~BASE_MEM_SAME_VA; - if (*flags & BASE_MEM_IMPORT_SHARED) - shared_zone = true; - #ifdef CONFIG_64BIT if (!kctx->is_compat) { - /* - * 64-bit tasks require us to reserve VA on the CPU that we use - * on the GPU. - */ - shared_zone = true; - } -#endif - - if (shared_zone) { + /* 64-bit tasks must MMAP anyway, but not expose this address to + * clients */ *flags |= BASE_MEM_NEED_MMAP; zone = KBASE_REG_ZONE_SAME_VA; } - +#endif reg = kbase_alloc_free_region(kctx, 0, *va_pages, zone); if (!reg) @@ -1184,13 +705,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( /* We can't really store the page list because that would involve */ /* keeping the pages pinned - instead we pin/unpin around the job */ /* (as part of the external resources handling code) */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) faulted_pages = get_user_pages(current, current->mm, address, *va_pages, reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL); -#else - faulted_pages = get_user_pages(address, *va_pages, - reg->flags & KBASE_REG_GPU_WR, 0, NULL, NULL); -#endif up_read(¤t->mm->mmap_sem); if (faulted_pages != *va_pages) @@ -1201,8 +717,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( reg->gpu_alloc->imported.user_buf.nr_pages = faulted_pages; reg->gpu_alloc->imported.user_buf.pages = kmalloc_array(faulted_pages, sizeof(struct page *), GFP_KERNEL); - reg->gpu_alloc->imported.user_buf.mm = current->mm; - atomic_inc(¤t->mm->mm_count); + reg->gpu_alloc->imported.user_buf.owner = current; if (!reg->gpu_alloc->imported.user_buf.pages) goto no_page_array; @@ -1302,9 +817,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* validate and add src handles */ for (i = 0; i < nents; i++) { - if (ai[i].handle.basep.handle < BASE_MEM_FIRST_FREE_ADDRESS) { - if (ai[i].handle.basep.handle != - BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE) + if (ai[i].handle < BASE_MEM_FIRST_FREE_ADDRESS) { + if (ai[i].handle != BASE_MEM_WRITE_ALLOC_PAGES_HANDLE) goto bad_handle; /* unsupported magic handle */ if (!ai[i].length) goto bad_handle; /* must be > 0 */ @@ -1316,17 +830,13 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, struct kbase_va_region *aliasing_reg; struct kbase_mem_phy_alloc *alloc; - aliasing_reg = kbase_region_tracker_find_region_base_address( - kctx, - (ai[i].handle.basep.handle >> PAGE_SHIFT) << PAGE_SHIFT); + aliasing_reg = kbase_region_tracker_find_region_base_address(kctx, (ai[i].handle >> PAGE_SHIFT) << PAGE_SHIFT); /* validate found region */ if (!aliasing_reg) goto bad_handle; /* Not found */ if (aliasing_reg->flags & KBASE_REG_FREE) goto bad_handle; /* Free region */ - if (aliasing_reg->flags & KBASE_REG_DONT_NEED) - goto bad_handle; /* Ephemeral region */ if (!aliasing_reg->gpu_alloc) goto bad_handle; /* No alloc */ if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) @@ -1548,7 +1058,6 @@ static int zap_range_nolock(struct mm_struct *mm, int err = -EINVAL; /* in case end < start */ while (start < end) { - unsigned long local_start; unsigned long local_end; vma = find_vma_intersection(mm, start, end); @@ -1559,17 +1068,12 @@ static int zap_range_nolock(struct mm_struct *mm, if (vma->vm_ops != vm_ops) goto try_next; - local_start = vma->vm_start; - - if (start > local_start) - local_start = start; - local_end = vma->vm_end; if (end < local_end) local_end = end; - err = zap_vma_ptes(vma, local_start, local_end - local_start); + err = zap_vma_ptes(vma, start, local_end - start); if (unlikely(err)) break; @@ -1581,98 +1085,19 @@ static int zap_range_nolock(struct mm_struct *mm, return err; } -int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages) -{ - phys_addr_t *phy_pages; - u64 delta = new_pages - old_pages; - int ret = 0; - - lockdep_assert_held(&kctx->reg_lock); - - /* Map the new pages into the GPU */ - phy_pages = kbase_get_gpu_phy_pages(reg); - ret = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages, - phy_pages + old_pages, delta, reg->flags); - - return ret; -} - -static int kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages) -{ - struct kbase_mem_phy_alloc *cpu_alloc = reg->cpu_alloc; - struct kbase_cpu_mapping *mapping; - int err; - - lockdep_assert_held(&kctx->process_mm->mmap_sem); - - list_for_each_entry(mapping, &cpu_alloc->mappings, mappings_list) { - unsigned long mapping_size; - - mapping_size = (mapping->vm_end - mapping->vm_start) - >> PAGE_SHIFT; - - /* is this mapping affected ?*/ - if ((mapping->page_off + mapping_size) > new_pages) { - unsigned long first_bad = 0; - - if (new_pages > mapping->page_off) - first_bad = new_pages - mapping->page_off; - - err = zap_range_nolock(current->mm, - &kbase_vm_ops, - mapping->vm_start + - (first_bad << PAGE_SHIFT), - mapping->vm_end); - - WARN(err, - "Failed to zap VA range (0x%lx - 0x%lx);\n", - mapping->vm_start + - (first_bad << PAGE_SHIFT), - mapping->vm_end - ); - - /* The zap failed, give up and exit */ - if (err) - goto failed; - } - } - - return 0; - -failed: - return err; -} - -static int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages) -{ - u64 delta = old_pages - new_pages; - int ret = 0; - - ret = kbase_mmu_teardown_pages(kctx, - reg->start_pfn + new_pages, delta); - - return ret; -} - int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason) { u64 old_pages; u64 delta; int res = -EINVAL; struct kbase_va_region *reg; - bool read_locked = false; + phys_addr_t *phy_pages; KBASE_DEBUG_ASSERT(kctx); KBASE_DEBUG_ASSERT(failure_reason); KBASE_DEBUG_ASSERT(gpu_addr != 0); - down_write(¤t->mm->mmap_sem); + down_read(¤t->mm->mmap_sem); kbase_gpu_vm_lock(kctx); /* Validate the region */ @@ -1706,11 +1131,6 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE; goto out_unlock; } - /* can't grow regions which are ephemeral */ - if (reg->flags & KBASE_REG_DONT_NEED) { - *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE; - goto out_unlock; - } if (new_pages == reg->gpu_alloc->nents) { /* no change */ @@ -1718,17 +1138,14 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en goto out_unlock; } + phy_pages = kbase_get_gpu_phy_pages(reg); old_pages = kbase_reg_current_backed_size(reg); - if (new_pages > old_pages) { - delta = new_pages - old_pages; - /* - * No update to the mm so downgrade the writer lock to a read - * lock so other readers aren't blocked after this point. - */ - downgrade_write(¤t->mm->mmap_sem); - read_locked = true; + if (new_pages > old_pages) { + /* growing */ + int err; + delta = new_pages - old_pages; /* Allocate some more pages */ if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) { *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; @@ -1743,15 +1160,9 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en goto out_unlock; } } - - /* No update required for CPU mappings, that's done on fault. */ - - /* Update GPU mapping. */ - res = kbase_mem_grow_gpu_mapping(kctx, reg, - new_pages, old_pages); - - /* On error free the new pages */ - if (res) { + err = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages, + phy_pages + old_pages, delta, reg->flags); + if (err) { kbase_free_phy_pages_helper(reg->cpu_alloc, delta); if (reg->cpu_alloc != reg->gpu_alloc) kbase_free_phy_pages_helper(reg->gpu_alloc, @@ -1760,35 +1171,60 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, en goto out_unlock; } } else { - delta = old_pages - new_pages; + /* shrinking */ + struct kbase_cpu_mapping *mapping; + int err; - /* Update all CPU mapping(s) */ - res = kbase_mem_shrink_cpu_mapping(kctx, reg, - new_pages, old_pages); - if (res) { - *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; - goto out_unlock; + /* first, unmap from any mappings affected */ + list_for_each_entry(mapping, ®->cpu_alloc->mappings, mappings_list) { + unsigned long mapping_size = (mapping->vm_end - mapping->vm_start) >> PAGE_SHIFT; + + /* is this mapping affected ?*/ + if ((mapping->page_off + mapping_size) > new_pages) { + unsigned long first_bad = 0; + int zap_res; + + if (new_pages > mapping->page_off) + first_bad = new_pages - mapping->page_off; + + zap_res = zap_range_nolock(current->mm, + &kbase_vm_ops, + mapping->vm_start + + (first_bad << PAGE_SHIFT), + mapping->vm_end); + WARN(zap_res, + "Failed to zap VA range (0x%lx - 0x%lx);\n", + mapping->vm_start + + (first_bad << PAGE_SHIFT), + mapping->vm_end + ); + } } - /* Update the GPU mapping */ - res = kbase_mem_shrink_gpu_mapping(kctx, reg, - new_pages, old_pages); - if (res) { + /* Free some pages */ + delta = old_pages - new_pages; + err = kbase_mmu_teardown_pages(kctx, reg->start_pfn + new_pages, + delta); + if (err) { *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; goto out_unlock; } - +#ifndef CONFIG_MALI_NO_MALI + if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) { + /* Wait for GPU to flush write buffer before freeing physical pages */ + kbase_wait_write_flush(kctx); + } +#endif kbase_free_phy_pages_helper(reg->cpu_alloc, delta); if (reg->cpu_alloc != reg->gpu_alloc) kbase_free_phy_pages_helper(reg->gpu_alloc, delta); } + res = 0; + out_unlock: kbase_gpu_vm_unlock(kctx); - if (read_locked) - up_read(¤t->mm->mmap_sem); - else - up_write(¤t->mm->mmap_sem); + up_read(¤t->mm->mmap_sem); return res; } @@ -1860,10 +1296,6 @@ static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (map->page_off + rel_pgoff >= map->alloc->nents) goto locked_bad_fault; - /* Fault on access to DONT_NEED regions */ - if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED)) - goto locked_bad_fault; - /* insert all valid pages from the fault location */ for (i = rel_pgoff; i < MIN((vma->vm_end - vma->vm_start) >> PAGE_SHIFT, @@ -2231,8 +1663,8 @@ int kbase_mmap(struct file *file, struct vm_area_struct *vma) rcu_read_unlock(); switch (vma->vm_pgoff) { - case PFN_DOWN(BASEP_MEM_INVALID_HANDLE): - case PFN_DOWN(BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE): + case PFN_DOWN(BASE_MEM_INVALID_HANDLE): + case PFN_DOWN(BASE_MEM_WRITE_ALLOC_PAGES_HANDLE): /* Illegal handle for direct map */ err = -EINVAL; goto out_unlock; @@ -2443,8 +1875,8 @@ int kbase_mmap(struct file *file, struct vm_area_struct *vma) KBASE_EXPORT_TEST_API(kbase_mmap); -void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, - unsigned long prot_request, struct kbase_vmap_struct *map) +void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, + struct kbase_vmap_struct *map) { struct kbase_va_region *reg; unsigned long page_index; @@ -2479,14 +1911,6 @@ void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, if (page_index + page_count > kbase_reg_current_backed_size(reg)) goto out_unlock; - if (reg->flags & KBASE_REG_DONT_NEED) - goto out_unlock; - - /* check access permissions can be satisfied - * Intended only for checking KBASE_REG_{CPU,GPU}_{RD,WR} */ - if ((reg->flags & prot_request) != prot_request) - goto out_unlock; - page_array = kbase_get_cpu_phy_pages(reg); if (!page_array) goto out_unlock; @@ -2503,9 +1927,6 @@ void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, /* Map uncached */ prot = pgprot_writecombine(prot); } - /* Note: enforcing a RO prot_request onto prot is not done, since: - * - CPU-arch-specific integration required - * - kbase_vmap() requires no access checks to be made/enforced */ cpu_addr = vmap(pages, page_count, VM_MAP, prot); @@ -2524,12 +1945,6 @@ void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0; sync_needed = map->is_cached; -#ifdef CONFIG_MALI_COH_KERN - /* kernel can use coherent memory if supported */ - if (kctx->kbdev->system_coherency == COHERENCY_ACE) - sync_needed = false; -#endif - if (sync_needed) { /* Sync first page */ size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); @@ -2564,17 +1979,6 @@ void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, kbase_gpu_vm_unlock(kctx); return NULL; } - -void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, - struct kbase_vmap_struct *map) -{ - /* 0 is specified for prot_request to indicate no access checks should - * be made. - * - * As mentioned in kbase_vmap_prot() this means that a kernel-side - * CPU-RO mapping is not enforced to allow this to work */ - return kbase_vmap_prot(kctx, gpu_addr, size, 0u, map); -} KBASE_EXPORT_TEST_API(kbase_vmap); void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) @@ -2582,11 +1986,6 @@ void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK); bool sync_needed = map->is_cached; vunmap(addr); -#ifdef CONFIG_MALI_COH_KERN - /* kernel can use coherent memory if supported */ - if (kctx->kbdev->system_coherency == COHERENCY_ACE) - sync_needed = false; -#endif if (sync_needed) { off_t offset = (uintptr_t)map->addr & ~PAGE_MASK; size_t size = map->size; diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h index 6471747a7dc5d..6a139fd702346 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010, 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -42,72 +42,6 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason); int kbase_mmap(struct file *file, struct vm_area_struct *vma); -/** - * kbase_mem_evictable_init - Initialize the Ephemeral memory the eviction - * mechanism. - * @kctx: The kbase context to initialize. - * - * Return: Zero on success or -errno on failure. - */ -int kbase_mem_evictable_init(struct kbase_context *kctx); - -/** - * kbase_mem_evictable_deinit - De-initialize the Ephemeral memory eviction - * mechanism. - * @kctx: The kbase context to de-initialize. - */ -void kbase_mem_evictable_deinit(struct kbase_context *kctx); - -/** - * kbase_mem_grow_gpu_mapping - Grow the GPU mapping of an allocation - * @kctx: Context the region belongs to - * @reg: The GPU region - * @new_pages: The number of pages after the grow - * @old_pages: The number of pages before the grow - * - * Return: 0 on success, -errno on error. - * - * Expand the GPU mapping to encompass the new psychical pages which have - * been added to the allocation. - * - * Note: Caller must be holding the region lock. - */ -int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, - struct kbase_va_region *reg, - u64 new_pages, u64 old_pages); - -/** - * kbase_mem_evictable_make - Make a physical allocation eligible for eviction - * @gpu_alloc: The physical allocation to make evictable - * - * Return: 0 on success, -errno on error. - * - * Take the provided region and make all the physical pages within it - * reclaimable by the kernel, updating the per-process VM stats as well. - * Remove any CPU mappings (as these can't be removed in the shrinker callback - * as mmap_sem might already be taken) but leave the GPU mapping intact as - * and until the shrinker reclaims the allocation. - * - * Note: Must be called with the region lock of the containing context. - */ -int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc); - -/** - * kbase_mem_evictable_unmake - Remove a physical allocations eligibility for - * eviction. - * @alloc: The physical allocation to remove eviction eligibility from. - * - * Return: True if the allocation had its backing restored and false if - * it hasn't. - * - * Make the physical pages in the region no longer reclaimable and update the - * per-process stats, if the shrinker has already evicted the memory then - * re-allocate it if the region is still alive. - * - * Note: Must be called with the region lock of the containing context. - */ -bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *alloc); - struct kbase_vmap_struct { u64 gpu_addr; struct kbase_mem_phy_alloc *cpu_alloc; @@ -118,83 +52,8 @@ struct kbase_vmap_struct { size_t size; bool is_cached; }; - - -/** - * kbase_vmap_prot - Map a GPU VA range into the kernel safely, only if the - * requested access permissions are supported - * @kctx: Context the VA range belongs to - * @gpu_addr: Start address of VA range - * @size: Size of VA range - * @prot_request: Flags indicating how the caller will then access the memory - * @map: Structure to be given to kbase_vunmap() on freeing - * - * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error - * - * Map a GPU VA Range into the kernel. The VA range must be contained within a - * GPU memory region. Appropriate CPU cache-flushing operations are made as - * required, dependent on the CPU mapping for the memory region. - * - * This is safer than using kmap() on the pages directly, - * because the pages here are refcounted to prevent freeing (and hence reuse - * elsewhere in the system) until an kbase_vunmap() - * - * The flags in @prot_request should use KBASE_REG_{CPU,GPU}_{RD,WR}, to check - * whether the region should allow the intended access, and return an error if - * disallowed. This is essential for security of imported memory, particularly - * a user buf from SHM mapped into the process as RO. In that case, write - * access must be checked if the intention is for kernel to write to the - * memory. - * - * The checks are also there to help catch access errors on memory where - * security is not a concern: imported memory that is always RW, and memory - * that was allocated and owned by the process attached to @kctx. In this case, - * it helps to identify memory that was was mapped with the wrong access type. - * - * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases - * where either the security of memory is solely dependent on those flags, or - * when userspace code was expecting only the GPU to access the memory (e.g. HW - * workarounds). - * - */ -void *kbase_vmap_prot(struct kbase_context *kctx, u64 gpu_addr, size_t size, - unsigned long prot_request, struct kbase_vmap_struct *map); - -/** - * kbase_vmap - Map a GPU VA range into the kernel safely - * @kctx: Context the VA range belongs to - * @gpu_addr: Start address of VA range - * @size: Size of VA range - * @map: Structure to be given to kbase_vunmap() on freeing - * - * Return: Kernel-accessible CPU pointer to the VA range, or NULL on error - * - * Map a GPU VA Range into the kernel. The VA range must be contained within a - * GPU memory region. Appropriate CPU cache-flushing operations are made as - * required, dependent on the CPU mapping for the memory region. - * - * This is safer than using kmap() on the pages directly, - * because the pages here are refcounted to prevent freeing (and hence reuse - * elsewhere in the system) until an kbase_vunmap() - * - * kbase_vmap_prot() should be used in preference, since kbase_vmap() makes no - * checks to ensure the security of e.g. imported user bufs from RO SHM. - */ void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, struct kbase_vmap_struct *map); - -/** - * kbase_vunmap - Unmap a GPU VA range from the kernel - * @kctx: Context the VA range belongs to - * @map: Structure describing the mapping from the corresponding kbase_vmap() - * call - * - * Unmaps a GPU VA range from the kernel, given its @map structure obtained - * from kbase_vmap(). Appropriate CPU cache-flushing operations are made as - * required, dependent on the CPU mapping for the memory region. - * - * The reference taken on pages during kbase_vmap() is released. - */ void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map); /** @brief Allocate memory from kernel space and map it onto the GPU diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c index 957061893b005..153cd4efac49f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,9 +43,6 @@ int __init kbase_carveout_mem_reserve(phys_addr_t size) kbase_mem_pool_max_size(pool), \ ##__VA_ARGS__) -#define NOT_DIRTY false -#define NOT_RECLAIMED false - static inline void kbase_mem_pool_lock(struct kbase_mem_pool *pool) { spin_lock(&pool->pool_lock); @@ -264,8 +261,6 @@ static size_t kbase_mem_pool_grow(struct kbase_mem_pool *pool, for (i = 0; i < nr_to_grow && !kbase_mem_pool_is_full(pool); i++) { p = kbase_mem_pool_alloc_page(pool); - if (!p) - break; kbase_mem_pool_add(pool, p); } @@ -510,7 +505,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages, return 0; err_rollback: - kbase_mem_pool_free_pages(pool, i, pages, NOT_DIRTY, NOT_RECLAIMED); + kbase_mem_pool_free_pages(pool, i, pages, false); return err; } @@ -553,7 +548,7 @@ static void kbase_mem_pool_add_array(struct kbase_mem_pool *pool, } void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, - phys_addr_t *pages, bool dirty, bool reclaimed) + phys_addr_t *pages, bool dirty) { struct kbase_mem_pool *next_pool = pool->next_pool; struct page *p; @@ -563,24 +558,22 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, pool_dbg(pool, "free_pages(%zu):\n", nr_pages); - if (!reclaimed) { - /* Add to this pool */ - nr_to_pool = kbase_mem_pool_capacity(pool); - nr_to_pool = min(nr_pages, nr_to_pool); + /* Add to this pool */ + nr_to_pool = kbase_mem_pool_capacity(pool); + nr_to_pool = min(nr_pages, nr_to_pool); - kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); + kbase_mem_pool_add_array(pool, nr_to_pool, pages, false, dirty); - i += nr_to_pool; + i += nr_to_pool; - if (i != nr_pages && next_pool) { - /* Spill to next pool (may overspill) */ - nr_to_pool = kbase_mem_pool_capacity(next_pool); - nr_to_pool = min(nr_pages - i, nr_to_pool); + if (i != nr_pages && next_pool) { + /* Spill to next pool (may overspill) */ + nr_to_pool = kbase_mem_pool_capacity(next_pool); + nr_to_pool = min(nr_pages - i, nr_to_pool); - kbase_mem_pool_add_array(next_pool, nr_to_pool, - pages + i, true, dirty); - i += nr_to_pool; - } + kbase_mem_pool_add_array(next_pool, nr_to_pool, pages + i, + true, dirty); + i += nr_to_pool; } /* Free any remaining pages to kernel */ @@ -589,10 +582,6 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages, continue; p = phys_to_page(pages[i]); - if (reclaimed) - zone_page_state_add(-1, page_zone(p), - NR_SLAB_RECLAIMABLE); - kbase_mem_pool_free_page(pool, p); pages[i] = 0; } diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c index 03594102f7ef3..0b19d05c46e80 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,7 +15,7 @@ -#include +#include #ifdef CONFIG_DEBUG_FS diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h index a1dc2e0b165b9..9555197f305cc 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -26,6 +26,7 @@ #ifndef _KBASE_MEM_PROFILE_DEBUGFS_H #define _KBASE_MEM_PROFILE_DEBUGFS_H +#include #include #include diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c index 48d53723a9b40..d81ef593e9288 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -30,8 +30,9 @@ #if defined(CONFIG_MALI_GATOR_SUPPORT) #include #endif +#if defined(CONFIG_MALI_MIPE_ENABLED) #include -#include +#endif #include #define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) @@ -40,31 +41,9 @@ #include #include #include -#include #define KBASE_MMU_PAGE_ENTRIES 512 -/** - * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches. - * @kctx: The KBase context. - * @vpfn: The virtual page frame number to start the flush on. - * @nr: The number of pages to flush. - * @sync: Set if the operation should be synchronous or not. - * - * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs. - * - * If sync is not set then transactions still in flight when the flush is issued - * may use the old page tables and the data they write will not be written out - * to memory, this function returns after the flush has been issued but - * before all accesses which might effect the flushed region have completed. - * - * If sync is set then accesses in the flushed region will be drained - * before data is flush and invalidated through L1, L2 and into memory, - * after which point this function will return. - */ -static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, - u64 vpfn, size_t nr, bool sync); - /** * kbase_mmu_sync_pgd - sync page directory to memory * @kbdev: Device pointer. @@ -77,12 +56,8 @@ static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, dma_addr_t handle, size_t size) { - /* If page table is not coherent then ensure the gpu can read - * the pages from memory - */ - if (kbdev->system_coherency != COHERENCY_ACE) - dma_sync_single_for_device(kbdev->dev, handle, size, - DMA_TO_DEVICE); + + dma_sync_single_for_device(kbdev->dev, handle, size, DMA_TO_DEVICE); } /* @@ -161,18 +136,6 @@ void page_fault_worker(struct work_struct *data) dev_warn(kbdev->dev, "Access flag unexpectedly set"); goto fault_done; -#ifdef CONFIG_MALI_GPU_MMU_AARCH64 - case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT: - - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Address size fault"); - goto fault_done; - - case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT: - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Memory attributes fault"); - goto fault_done; -#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ default: kbase_mmu_report_fault_and_kill(kctx, faulting_as, @@ -201,13 +164,6 @@ void page_fault_worker(struct work_struct *data) goto fault_done; } - if ((region->flags & KBASE_REG_DONT_NEED)) { - kbase_gpu_vm_unlock(kctx); - kbase_mmu_report_fault_and_kill(kctx, faulting_as, - "Don't need memory can't be grown"); - goto fault_done; - } - /* find the size we need to grow it by */ /* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address * validating the fault_adress to be within a size_t from the start_pfn */ @@ -277,27 +233,19 @@ void page_fault_worker(struct work_struct *data) if (grown) { - u64 pfn_offset; u32 op; /* alloc success */ KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages); + /* AS transaction begin */ + mutex_lock(&faulting_as->transaction_mutex); + /* set up the new pages */ - pfn_offset = kbase_reg_current_backed_size(region) - new_pages; - /* - * Note: - * Issuing an MMU operation will unlock the MMU and cause the - * translation to be replayed. If the page insertion fails then - * rather then trying to continue the context should be killed - * so the no_flush version of insert_pages is used which allows - * us to unlock the MMU as we see fit. - */ - err = kbase_mmu_insert_pages_no_flush(kctx, - region->start_pfn + pfn_offset, - &kbase_get_gpu_phy_pages(region)[pfn_offset], - new_pages, region->flags); + err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_gpu_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags); if (err) { + /* failed to insert pages, handle as a normal PF */ + mutex_unlock(&faulting_as->transaction_mutex); kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); if (region->gpu_alloc != region->cpu_alloc) kbase_free_phy_pages_helper(region->cpu_alloc, @@ -311,10 +259,9 @@ void page_fault_worker(struct work_struct *data) #if defined(CONFIG_MALI_GATOR_SUPPORT) kbase_trace_mali_page_fault_insert_pages(as_no, new_pages); #endif +#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_pagefault(kctx->id, (u64)new_pages); - - /* AS transaction begin */ - mutex_lock(&faulting_as->transaction_mutex); +#endif /* flush L2 and unlock the VA (resumes the MMU) */ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367)) @@ -377,9 +324,11 @@ phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx) if (!p) goto sub_pages; +#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_pagesalloc( (u32)kctx->id, (u64)new_page_count); +#endif page = kmap(p); if (NULL == page) @@ -416,7 +365,7 @@ static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, KBASE_DEBUG_ASSERT(pgd); KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->mmu_lock); + lockdep_assert_held(&kctx->reg_lock); /* * Architecture spec defines level-0 as being the top-most. @@ -457,9 +406,8 @@ static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn) phys_addr_t pgd; int l; - lockdep_assert_held(&kctx->mmu_lock); - pgd = kctx->pgd; + for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) { pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l); /* Handle failure condition */ @@ -480,7 +428,7 @@ static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *k KBASE_DEBUG_ASSERT(pgd); KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->mmu_lock); + lockdep_assert_held(&kctx->reg_lock); /* * Architecture spec defines level-0 as being the top-most. @@ -505,8 +453,6 @@ static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context phys_addr_t pgd; int l; - lockdep_assert_held(&kctx->mmu_lock); - pgd = kctx->pgd; for (l = MIDGARD_MMU_TOPLEVEL; l < MIDGARD_MMU_BOTTOMLEVEL; l++) { @@ -530,7 +476,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vp /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - lockdep_assert_held(&kctx->mmu_lock); + lockdep_assert_held(&kctx->reg_lock); mmu_mode = kctx->kbdev->mmu_mode; @@ -578,28 +524,22 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, bool recover_required = false; u64 recover_vpfn = vpfn; size_t recover_count = 0; - size_t remain = nr; - int err; KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(0 != vpfn); /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - /* Early out if there is nothing to do */ - if (nr == 0) - return 0; - - mutex_lock(&kctx->mmu_lock); + lockdep_assert_held(&kctx->reg_lock); - while (remain) { + while (nr) { unsigned int i; unsigned int index = vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; struct page *p; - if (count > remain) - count = remain; + if (count > nr) + count = nr; /* * Repeatedly calling mmu_get_bottom_pte() is clearly @@ -618,8 +558,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - err = -EINVAL; - goto fail_unlock; + return -EINVAL; } p = pfn_to_page(PFN_DOWN(pgd)); @@ -633,8 +572,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - err = -ENOMEM; - goto fail_unlock; + return -ENOMEM; } for (i = 0; i < count; i++) { @@ -646,7 +584,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, } vpfn += count; - remain -= count; + nr -= count; kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p) + (index * sizeof(u64)), @@ -659,17 +597,13 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, recover_required = true; recover_count += count; } - mutex_unlock(&kctx->mmu_lock); - kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); return 0; - -fail_unlock: - mutex_unlock(&kctx->mmu_lock); - kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); - return err; } -int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, +/* + * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' + */ +int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags) { @@ -680,28 +614,22 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, bool recover_required = false; u64 recover_vpfn = vpfn; size_t recover_count = 0; - size_t remain = nr; - int err; KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(0 != vpfn); /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - /* Early out if there is nothing to do */ - if (nr == 0) - return 0; - - mutex_lock(&kctx->mmu_lock); + lockdep_assert_held(&kctx->reg_lock); - while (remain) { + while (nr) { unsigned int i; unsigned int index = vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; struct page *p; - if (count > remain) - count = remain; + if (count > nr) + count = nr; /* * Repeatedly calling mmu_get_bottom_pte() is clearly @@ -720,8 +648,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - err = -EINVAL; - goto fail_unlock; + return -EINVAL; } p = pfn_to_page(PFN_DOWN(pgd)); @@ -735,8 +662,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, recover_vpfn, recover_count); } - err = -ENOMEM; - goto fail_unlock; + return -ENOMEM; } for (i = 0; i < count; i++) { @@ -749,7 +675,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, phys += count; vpfn += count; - remain -= count; + nr -= count; kbase_mmu_sync_pgd(kctx->kbdev, kbase_dma_addr(p) + (index * sizeof(u64)), @@ -762,209 +688,81 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_context *kctx, u64 vpfn, recover_required = true; recover_count += count; } - - mutex_unlock(&kctx->mmu_lock); return 0; - -fail_unlock: - mutex_unlock(&kctx->mmu_lock); - return err; -} - -/* - * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' - */ -int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, - phys_addr_t *phys, size_t nr, - unsigned long flags) -{ - int err; - - err = kbase_mmu_insert_pages_no_flush(kctx, vpfn, phys, nr, flags); - kbase_mmu_flush_invalidate(kctx, vpfn, nr, false); - return err; } KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); /** - * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches - * without retaining the kbase context. - * @kctx: The KBase context. - * @vpfn: The virtual page frame number to start the flush on. - * @nr: The number of pages to flush. - * @sync: Set if the operation should be synchronous or not. + * This function is responsible for validating the MMU PTs + * triggering reguired flushes. * - * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any - * other locking. + * * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is + * currently scheduled into the runpool, and so potentially uses a lot of locks. + * These locks must be taken in the correct order with respect to others + * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more + * information. */ -static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx, - u64 vpfn, size_t nr, bool sync) -{ - struct kbase_device *kbdev = kctx->kbdev; - int err; - u32 op; - - /* Early out if there is nothing to do */ - if (nr == 0) - return; - - if (sync) - op = AS_COMMAND_FLUSH_MEM; - else - op = AS_COMMAND_FLUSH_PT; - - err = kbase_mmu_hw_do_operation(kbdev, - &kbdev->as[kctx->as_nr], - kctx, vpfn, nr, op, 0); -#if KBASE_GPU_RESET_EN - if (err) { - /* Flush failed to complete, assume the - * GPU has hung and perform a reset to - * recover */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n"); - - if (kbase_prepare_to_reset_gpu_locked(kbdev)) - kbase_reset_gpu_locked(kbdev); - } -#endif /* KBASE_GPU_RESET_EN */ - -#ifndef CONFIG_MALI_NO_MALI - /* - * As this function could be called in interrupt context the sync - * request can't block. Instead log the request and the next flush - * request will pick it up. - */ - if ((!err) && sync && - kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) - atomic_set(&kctx->drain_pending, 1); -#endif /* !CONFIG_MALI_NO_MALI */ -} - -static void kbase_mmu_flush_invalidate(struct kbase_context *kctx, - u64 vpfn, size_t nr, bool sync) +static void kbase_mmu_flush(struct kbase_context *kctx, u64 vpfn, size_t nr) { struct kbase_device *kbdev; bool ctx_is_in_runpool; -#ifndef CONFIG_MALI_NO_MALI - bool drain_pending = false; - if (atomic_xchg(&kctx->drain_pending, 0)) - drain_pending = true; -#endif /* !CONFIG_MALI_NO_MALI */ - - /* Early out if there is nothing to do */ - if (nr == 0) - return; + KBASE_DEBUG_ASSERT(NULL != kctx); kbdev = kctx->kbdev; + + /* We must flush if we're currently running jobs. At the very least, we need to retain the + * context to ensure it doesn't schedule out whilst we're trying to flush it */ ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx); if (ctx_is_in_runpool) { KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - if (!kbase_pm_context_active_handle_suspend(kbdev, - KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { - int err; - u32 op; - - /* AS transaction begin */ - mutex_lock(&kbdev->as[ - kctx->as_nr].transaction_mutex); - - if (sync) - op = AS_COMMAND_FLUSH_MEM; - else - op = AS_COMMAND_FLUSH_PT; - - err = kbase_mmu_hw_do_operation(kbdev, - &kbdev->as[kctx->as_nr], - kctx, vpfn, nr, op, 0); - + /* Second level check is to try to only do this when jobs are running. The refcount is + * a heuristic for this. */ + if (kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr].as_busy_refcount >= 2) { + if (!kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + int ret; + u32 op; + + /* AS transaction begin */ + mutex_lock(&kbdev->as[ + kctx->as_nr].transaction_mutex); + + if (kbase_hw_has_issue(kbdev, + BASE_HW_ISSUE_6367)) + op = AS_COMMAND_FLUSH; + else + op = AS_COMMAND_FLUSH_MEM; + + ret = kbase_mmu_hw_do_operation(kbdev, + &kbdev->as[kctx->as_nr], + kctx, vpfn, nr, + op, 0); #if KBASE_GPU_RESET_EN - if (err) { - /* Flush failed to complete, assume the - * GPU has hung and perform a reset to - * recover */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); - - if (kbase_prepare_to_reset_gpu(kbdev)) - kbase_reset_gpu(kbdev); - } + if (ret) { + /* Flush failed to complete, assume the + * GPU has hung and perform a reset to + * recover */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + } #endif /* KBASE_GPU_RESET_EN */ - mutex_unlock(&kbdev->as[ - kctx->as_nr].transaction_mutex); - /* AS transaction end */ + mutex_unlock(&kbdev->as[ + kctx->as_nr].transaction_mutex); + /* AS transaction end */ -#ifndef CONFIG_MALI_NO_MALI - /* - * The transaction lock must be dropped before here - * as kbase_wait_write_flush could take it if - * the GPU was powered down (static analysis doesn't - * know this can't happen). - */ - drain_pending |= (!err) && sync && - kbase_hw_has_issue(kctx->kbdev, - BASE_HW_ISSUE_6367); - if (drain_pending) { - /* Wait for GPU to flush write buffer */ - kbase_wait_write_flush(kctx); + kbase_pm_context_idle(kbdev); } -#endif /* !CONFIG_MALI_NO_MALI */ - - kbase_pm_context_idle(kbdev); } kbasep_js_runpool_release_ctx(kbdev, kctx); } } -void kbase_mmu_update(struct kbase_context *kctx) -{ - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - /* ASSERT that the context has a valid as_nr, which is only the case - * when it's scheduled in. - * - * as_nr won't change because the caller has the runpool_irq lock */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex); - - kctx->kbdev->mmu_mode->update(kctx); -} -KBASE_EXPORT_TEST_API(kbase_mmu_update); - -void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) -{ - lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex); - lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); - - kbdev->mmu_mode->disable_as(kbdev, as_nr); -} - -void kbase_mmu_disable(struct kbase_context *kctx) -{ - /* ASSERT that the context has a valid as_nr, which is only the case - * when it's scheduled in. - * - * as_nr won't change because the caller has the runpool_irq lock */ - KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); - - lockdep_assert_held(&kctx->kbdev->as[kctx->as_nr].transaction_mutex); - lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); - - /* - * The address space is being disabled, drain all knowledge of it out - * from the caches as pages and page tables might be freed after this. - * - * The job scheduler code will already be holding the locks and context - * so just do the flush. - */ - kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0, true); - - kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); -} -KBASE_EXPORT_TEST_API(kbase_mmu_disable); - /* * We actually only discard the ATE, and not the page table * pages. There is a potential DoS here, as we'll leak memory by @@ -984,18 +782,17 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) struct kbase_device *kbdev; size_t requested_nr = nr; struct kbase_mmu_mode const *mmu_mode; - int err; KBASE_DEBUG_ASSERT(NULL != kctx); beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr); + lockdep_assert_held(&kctx->reg_lock); + if (0 == nr) { /* early out if nothing to do */ return 0; } - mutex_lock(&kctx->mmu_lock); - kbdev = kctx->kbdev; mmu_mode = kbdev->mmu_mode; @@ -1011,16 +808,14 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) pgd = mmu_get_bottom_pgd(kctx, vpfn); if (!pgd) { dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n"); - err = -EINVAL; - goto fail_unlock; + return -EINVAL; } p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n"); - err = -ENOMEM; - goto fail_unlock; + return -ENOMEM; } for (i = 0; i < count; i++) @@ -1036,14 +831,8 @@ int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) kunmap(p); } - mutex_unlock(&kctx->mmu_lock); - kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); + kbase_mmu_flush(kctx, vpfn, requested_nr); return 0; - -fail_unlock: - mutex_unlock(&kctx->mmu_lock); - kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); - return err; } KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); @@ -1066,17 +855,12 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph u64 *pgd_page; size_t requested_nr = nr; struct kbase_mmu_mode const *mmu_mode; - int err; KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(0 != vpfn); KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); - /* Early out if there is nothing to do */ - if (nr == 0) - return 0; - - mutex_lock(&kctx->mmu_lock); + lockdep_assert_held(&kctx->reg_lock); mmu_mode = kctx->kbdev->mmu_mode; @@ -1095,16 +879,14 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph pgd = mmu_get_bottom_pgd(kctx, vpfn); if (!pgd) { dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n"); - err = -EINVAL; - goto fail_unlock; + return -EINVAL; } p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { dev_warn(kctx->kbdev->dev, "kmap failure\n"); - err = -ENOMEM; - goto fail_unlock; + return -ENOMEM; } for (i = 0; i < count; i++) @@ -1122,14 +904,9 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *ph kunmap(pfn_to_page(PFN_DOWN(pgd))); } - mutex_unlock(&kctx->mmu_lock); - kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); - return 0; + kbase_mmu_flush(kctx, vpfn, requested_nr); -fail_unlock: - mutex_unlock(&kctx->mmu_lock); - kbase_mmu_flush_invalidate(kctx, vpfn, requested_nr, true); - return err; + return 0; } /* This is a debug feature only */ @@ -1157,7 +934,7 @@ static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int struct kbase_mmu_mode const *mmu_mode; KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->mmu_lock); + lockdep_assert_held(&kctx->reg_lock); pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); /* kmap_atomic should NEVER fail. */ @@ -1202,8 +979,6 @@ int kbase_mmu_init(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages); - mutex_init(&kctx->mmu_lock); - /* Preallocate MMU depth of four pages for mmu_teardown_level to use */ kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); @@ -1229,9 +1004,9 @@ void kbase_mmu_free_pgd(struct kbase_context *kctx) KBASE_DEBUG_ASSERT(NULL != kctx); KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages); - mutex_lock(&kctx->mmu_lock); + lockdep_assert_held(&kctx->reg_lock); + mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages); - mutex_unlock(&kctx->mmu_lock); beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd); kbase_mem_pool_free(&kctx->mem_pool, phys_to_page(kctx->pgd), true); @@ -1239,9 +1014,11 @@ void kbase_mmu_free_pgd(struct kbase_context *kctx) new_page_count = kbase_atomic_sub_pages(1, &kctx->used_pages); kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); +#if defined(CONFIG_MALI_MIPE_ENABLED) kbase_tlstream_aux_pagesalloc( (u32)kctx->id, (u64)new_page_count); +#endif } KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd); @@ -1256,7 +1033,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, struct kbase_mmu_mode const *mmu_mode; KBASE_DEBUG_ASSERT(NULL != kctx); - lockdep_assert_held(&kctx->mmu_lock); + lockdep_assert_held(&kctx->reg_lock); mmu_mode = kctx->kbdev->mmu_mode; @@ -1311,13 +1088,13 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) KBASE_DEBUG_ASSERT(kctx); + lockdep_assert_held(&kctx->reg_lock); + if (0 == nr_pages) { /* can't dump in a 0 sized buffer, early out */ return NULL; } - mutex_lock(&kctx->mmu_lock); - size_left = nr_pages * PAGE_SIZE; KBASE_DEBUG_ASSERT(0 != size_left); @@ -1339,7 +1116,7 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) kctx->kbdev->mmu_mode->get_as_setup(kctx, &as_setup); config[0] = as_setup.transtab; config[1] = as_setup.memattr; - config[2] = as_setup.transcfg; + config[2] = 0; memcpy(buffer, &config, sizeof(config)); mmu_dump_buffer += sizeof(config); size_left -= sizeof(config); @@ -1353,8 +1130,10 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) &mmu_dump_buffer, &size_left); - if (!size) - goto fail_free; + if (!size) { + vfree(kaddr); + return NULL; + } /* Add on the size for the end marker */ size += sizeof(u64); @@ -1365,20 +1144,15 @@ void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) { /* The buffer isn't big enough - free the memory and return failure */ - goto fail_free; + vfree(kaddr); + return NULL; } /* Add the end marker */ memcpy(mmu_dump_buffer, &end_marker, sizeof(u64)); } - mutex_unlock(&kctx->mmu_lock); return kaddr; - -fail_free: - vfree(kaddr); - mutex_unlock(&kctx->mmu_lock); - return NULL; } KBASE_EXPORT_TEST_API(kbase_mmu_dump); @@ -1419,17 +1193,13 @@ void bus_fault_worker(struct work_struct *data) #endif /* KBASE_GPU_RESET_EN */ /* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */ if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { - unsigned long flags; /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ /* AS transaction begin */ mutex_lock(&kbdev->as[as_no].transaction_mutex); /* Set the MMU into unmapped mode */ - spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); - kbase_mmu_disable(kctx); - spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, - flags); + kbase_mmu_disable_as(kbdev, as_no); mutex_unlock(&kbdev->as[as_no].transaction_mutex); /* AS transaction end */ @@ -1547,15 +1317,6 @@ const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code) e = "TRANSLATION_FAULT"; break; case 0xC8: -#ifdef CONFIG_MALI_GPU_MMU_AARCH64 - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: -#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ e = "PERMISSION_FAULT"; break; case 0xD0: @@ -1569,38 +1330,8 @@ const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code) e = "TRANSTAB_BUS_FAULT"; break; case 0xD8: -#ifdef CONFIG_MALI_GPU_MMU_AARCH64 - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: -#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ e = "ACCESS_FLAG"; break; -#ifdef CONFIG_MALI_GPU_MMU_AARCH64 - case 0xE0: - case 0xE1: - case 0xE2: - case 0xE3: - case 0xE4: - case 0xE5: - case 0xE6: - case 0xE7: - e = "ADDRESS_SIZE_FAULT"; - break; - case 0xE8: - case 0xE9: - case 0xEA: - case 0xEB: - case 0xEC: - case 0xED: - case 0xEE: - case 0xEF: - e = "MEMORY_ATTRIBUTES_FAULT"; -#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ break; default: e = "UNKNOWN"; @@ -1614,12 +1345,7 @@ static const char *access_type_name(struct kbase_device *kbdev, u32 fault_status) { switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { - case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: -#ifdef CONFIG_MALI_GPU_MMU_AARCH64 - return "ATOMIC"; -#else return "UNKNOWN"; -#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ case AS_FAULTSTATUS_ACCESS_TYPE_READ: return "READ"; case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: @@ -1715,9 +1441,7 @@ static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, } #endif /* KBASE_GPU_RESET_EN */ /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - kbase_mmu_disable(kctx); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + kbase_mmu_disable_as(kbdev, as_no); mutex_unlock(&as->transaction_mutex); /* AS transaction end */ @@ -1954,15 +1678,8 @@ void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_contex */ kbasep_js_clear_submit_allowed(js_devdata, kctx); -#ifdef CONFIG_MALI_GPU_MMU_AARCH64 - dev_warn(kbdev->dev, - "Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n", - as->number, as->fault_addr, - as->fault_extra_addr); -#else dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", as->number, as->fault_addr); -#endif /* CONFIG_MALI_GPU_MMU_AARCH64 */ /* * We need to switch to UNMAPPED mode - but we do this in a diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c index 683cabb797db6..079ef81d06d13 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -77,25 +77,17 @@ static void mmu_get_as_setup(struct kbase_context *kctx, (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY << (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL << - (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | + (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | (AS_MEMATTR_LPAE_WRITE_ALLOC << - (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | - (AS_MEMATTR_LPAE_OUTER_IMPL_DEF << - (AS_MEMATTR_INDEX_OUTER_IMPL_DEF * 8)) | - (AS_MEMATTR_LPAE_OUTER_WA << - (AS_MEMATTR_INDEX_OUTER_WA * 8)) | + (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | 0; /* The other indices are unused for now */ - setup->transtab = ((u64)kctx->pgd & - ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK)) | - AS_TRANSTAB_LPAE_ADRMODE_TABLE | - AS_TRANSTAB_LPAE_READ_INNER; + setup->transtab = (u64)kctx->pgd & + ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK); + + setup->transtab |= AS_TRANSTAB_LPAE_ADRMODE_TABLE; + setup->transtab |= AS_TRANSTAB_LPAE_READ_INNER; -#ifdef CONFIG_MALI_GPU_MMU_AARCH64 - setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY; -#else - setup->transcfg = 0; -#endif } static void mmu_update(struct kbase_context *kctx) @@ -117,9 +109,6 @@ static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED; -#ifdef CONFIG_MALI_GPU_MMU_AARCH64 - current_setup->transcfg = AS_TRANSCFG_ADRMODE_LEGACY; -#endif /* Apply the address space setting */ kbase_mmu_hw_configure(kbdev, as, NULL); diff --git a/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c index 1a44957fe44a5..5bbd6d4856382 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c +++ b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2014, 2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,6 +23,9 @@ #include #include +#ifdef CONFIG_MACH_MANTA +#include +#endif /* * This file is included only for type definitions and functions belonging to @@ -59,7 +62,6 @@ static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io linux_resources[0].start = io_resources->io_memory_region.start; linux_resources[0].end = io_resources->io_memory_region.end; linux_resources[0].flags = IORESOURCE_MEM; - linux_resources[1].start = io_resources->job_irq_number; linux_resources[1].end = io_resources->job_irq_number; linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.c b/drivers/gpu/arm/midgard/mali_kbase_pm.c index 97d543464c28b..261441fa145b2 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_pm.c +++ b/drivers/gpu/arm/midgard/mali_kbase_pm.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,10 +21,10 @@ * @file mali_kbase_pm.c * Base kernel power management APIs */ - #include #include -#include +#include +#include #include @@ -151,10 +151,6 @@ void kbase_pm_suspend(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev); - /* Suspend vinstr. - * This call will block until vinstr is suspended. */ - kbase_vinstr_suspend(kbdev->vinstr_ctx); - mutex_lock(&kbdev->pm.lock); KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); kbdev->pm.suspending = true; @@ -168,6 +164,9 @@ void kbase_pm_suspend(struct kbase_device *kbdev) * the PM active count references */ kbasep_js_suspend(kbdev); + /* Suspend any counter collection that might be happening */ + kbase_instr_hwcnt_suspend(kbdev); + /* Wait for the active count to reach zero. This is not the same as * waiting for a power down, since not all policies power down when this * reaches zero. */ @@ -187,6 +186,9 @@ void kbase_pm_resume(struct kbase_device *kbdev) /* Initial active call, to power on the GPU/cores if needed */ kbase_pm_context_active(kbdev); + /* Re-enable instrumentation, if it was previously disabled */ + kbase_instr_hwcnt_resume(kbdev); + /* Resume any blocked atoms (which may cause contexts to be scheduled in * and dependent atoms to run) */ kbase_resume_suspended_soft_jobs(kbdev); @@ -198,8 +200,5 @@ void kbase_pm_resume(struct kbase_device *kbdev) /* Matching idle call, to power off the GPU/cores if we didn't actually * need it and the policy doesn't want it on */ kbase_pm_context_idle(kbdev); - - /* Resume vinstr operation */ - kbase_vinstr_resume(kbdev->vinstr_ctx); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c index d3a3dbfa52419..71f005e325215 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_replay.c +++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,11 +27,15 @@ #include #define JOB_NOT_STARTED 0 -#define JOB_TYPE_NULL (1) -#define JOB_TYPE_VERTEX (5) -#define JOB_TYPE_TILER (7) -#define JOB_TYPE_FUSED (8) -#define JOB_TYPE_FRAGMENT (9) +#define JOB_TYPE_MASK 0xfe +#define JOB_TYPE_NULL (1 << 1) +#define JOB_TYPE_VERTEX (5 << 1) +#define JOB_TYPE_TILER (7 << 1) +#define JOB_TYPE_FUSED (8 << 1) +#define JOB_TYPE_FRAGMENT (9 << 1) + +#define JOB_FLAG_DESC_SIZE (1 << 0) +#define JOB_FLAG_PERFORM_JOB_BARRIER (1 << 8) #define JOB_HEADER_32_FBD_OFFSET (31*4) #define JOB_HEADER_64_FBD_OFFSET (44*4) @@ -54,9 +58,17 @@ #define JOB_SOURCE_ID(status) (((status) >> 16) & 0xFFFF) #define JOB_POLYGON_LIST (0x03) -struct fragment_job { - struct job_descriptor_header header; - +struct job_head { + u32 status; + u32 not_complete_index; + u64 fault_addr; + u16 flags; + u16 index; + u16 dependencies[2]; + union { + u64 _64; + u32 _32; + } next; u32 x[2]; union { u64 _64; @@ -65,43 +77,28 @@ struct fragment_job { }; static void dump_job_head(struct kbase_context *kctx, char *head_str, - struct job_descriptor_header *job) + struct job_head *job) { #ifdef CONFIG_MALI_DEBUG dev_dbg(kctx->kbdev->dev, "%s\n", head_str); - dev_dbg(kctx->kbdev->dev, - "addr = %p\n" - "exception_status = %x (Source ID: 0x%x Access: 0x%x Exception: 0x%x)\n" - "first_incomplete_task = %x\n" - "fault_pointer = %llx\n" - "job_descriptor_size = %x\n" - "job_type = %x\n" - "job_barrier = %x\n" - "_reserved_01 = %x\n" - "_reserved_02 = %x\n" - "_reserved_03 = %x\n" - "_reserved_04/05 = %x,%x\n" - "job_index = %x\n" - "dependencies = %x,%x\n", - job, job->exception_status, - JOB_SOURCE_ID(job->exception_status), - (job->exception_status >> 8) & 0x3, - job->exception_status & 0xFF, - job->first_incomplete_task, - job->fault_pointer, job->job_descriptor_size, - job->job_type, job->job_barrier, job->_reserved_01, - job->_reserved_02, job->_reserved_03, - job->_reserved_04, job->_reserved_05, - job->job_index, - job->job_dependency_index_1, - job->job_dependency_index_2); - - if (job->job_descriptor_size) + dev_dbg(kctx->kbdev->dev, "addr = %p\n" + "status = %x\n" + "not_complete_index = %x\n" + "fault_addr = %llx\n" + "flags = %x\n" + "index = %x\n" + "dependencies = %x,%x\n", + job, job->status, job->not_complete_index, + job->fault_addr, job->flags, job->index, + job->dependencies[0], + job->dependencies[1]); + + if (job->flags & JOB_FLAG_DESC_SIZE) dev_dbg(kctx->kbdev->dev, "next = %llx\n", - job->next_job._64); + job->next._64); else dev_dbg(kctx->kbdev->dev, "next = %x\n", - job->next_job._32); + job->next._32); #endif } @@ -375,81 +372,77 @@ static int kbasep_replay_reset_job(struct kbase_context *kctx, u32 default_weight, u16 hw_job_id_offset, bool first_in_chain, bool fragment_chain) { - struct fragment_job *frag_job; - struct job_descriptor_header *job; + struct job_head *job; u64 new_job_header; struct kbase_vmap_struct map; - frag_job = kbase_vmap(kctx, *job_header, sizeof(*frag_job), &map); - if (!frag_job) { + job = kbase_vmap(kctx, *job_header, sizeof(*job), &map); + if (!job) { dev_err(kctx->kbdev->dev, "kbasep_replay_parse_jc: failed to map jc\n"); return -EINVAL; } - job = &frag_job->header; dump_job_head(kctx, "Job header:", job); - if (job->exception_status == JOB_NOT_STARTED && !fragment_chain) { + if (job->status == JOB_NOT_STARTED && !fragment_chain) { dev_err(kctx->kbdev->dev, "Job already not started\n"); goto out_unmap; } - job->exception_status = JOB_NOT_STARTED; + job->status = JOB_NOT_STARTED; - if (job->job_type == JOB_TYPE_VERTEX) - job->job_type = JOB_TYPE_NULL; + if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_VERTEX) + job->flags = (job->flags & ~JOB_TYPE_MASK) | JOB_TYPE_NULL; - if (job->job_type == JOB_TYPE_FUSED) { + if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FUSED) { dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n"); goto out_unmap; } if (first_in_chain) - job->job_barrier = 1; + job->flags |= JOB_FLAG_PERFORM_JOB_BARRIER; - if ((job->job_dependency_index_1 + hw_job_id_offset) > - JOB_HEADER_ID_MAX || - (job->job_dependency_index_2 + hw_job_id_offset) > - JOB_HEADER_ID_MAX || - (job->job_index + hw_job_id_offset) > JOB_HEADER_ID_MAX) { + if ((job->dependencies[0] + hw_job_id_offset) > JOB_HEADER_ID_MAX || + (job->dependencies[1] + hw_job_id_offset) > JOB_HEADER_ID_MAX || + (job->index + hw_job_id_offset) > JOB_HEADER_ID_MAX) { dev_err(kctx->kbdev->dev, "Job indicies/dependencies out of valid range\n"); goto out_unmap; } - if (job->job_dependency_index_1) - job->job_dependency_index_1 += hw_job_id_offset; - if (job->job_dependency_index_2) - job->job_dependency_index_2 += hw_job_id_offset; + if (job->dependencies[0]) + job->dependencies[0] += hw_job_id_offset; + if (job->dependencies[1]) + job->dependencies[1] += hw_job_id_offset; - job->job_index += hw_job_id_offset; + job->index += hw_job_id_offset; - if (job->job_descriptor_size) { - new_job_header = job->next_job._64; - if (!job->next_job._64) - job->next_job._64 = prev_jc; + if (job->flags & JOB_FLAG_DESC_SIZE) { + new_job_header = job->next._64; + if (!job->next._64) + job->next._64 = prev_jc; } else { - new_job_header = job->next_job._32; - if (!job->next_job._32) - job->next_job._32 = prev_jc; + new_job_header = job->next._32; + if (!job->next._32) + job->next._32 = prev_jc; } dump_job_head(kctx, "Updated to:", job); - if (job->job_type == JOB_TYPE_TILER) { - bool job_64 = job->job_descriptor_size != 0; + if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_TILER) { + bool job_64 = (job->flags & JOB_FLAG_DESC_SIZE) != 0; if (kbasep_replay_reset_tiler_job(kctx, *job_header, tiler_heap_free, hierarchy_mask, default_weight, job_64) != 0) goto out_unmap; - } else if (job->job_type == JOB_TYPE_FRAGMENT) { + } else if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FRAGMENT) { u64 fbd_address; - if (job->job_descriptor_size) - fbd_address = frag_job->fragment_fbd._64; + if (job->flags & JOB_FLAG_DESC_SIZE) + fbd_address = job->fragment_fbd._64; else - fbd_address = (u64)frag_job->fragment_fbd._32; + fbd_address = (u64)job->fragment_fbd._32; if (fbd_address & FBD_TYPE) { if (kbasep_replay_reset_mfbd(kctx, @@ -492,7 +485,7 @@ static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx, u64 jc, u16 *hw_job_id) { while (jc) { - struct job_descriptor_header *job; + struct job_head *job; struct kbase_vmap_struct map; dev_dbg(kctx->kbdev->dev, @@ -505,13 +498,13 @@ static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx, return -EINVAL; } - if (job->job_index > *hw_job_id) - *hw_job_id = job->job_index; + if (job->index > *hw_job_id) + *hw_job_id = job->index; - if (job->job_descriptor_size) - jc = job->next_job._64; + if (job->flags & JOB_FLAG_DESC_SIZE) + jc = job->next._64; else - jc = job->next_job._32; + jc = job->next._32; kbase_vunmap(kctx, &map); } @@ -756,7 +749,7 @@ static int kbasep_replay_parse_payload(struct kbase_context *kctx, struct base_jd_atom_v2 *t_atom, struct base_jd_atom_v2 *f_atom) { - base_jd_replay_payload *payload = NULL; + base_jd_replay_payload *payload; u64 next; u64 prev_jc = 0; u16 hw_job_id_offset = 0; @@ -767,27 +760,12 @@ static int kbasep_replay_parse_payload(struct kbase_context *kctx, replay_atom->jc, sizeof(payload)); payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map); + if (!payload) { dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n"); return -EINVAL; } -#ifdef BASE_LEGACY_UK10_2_SUPPORT - if (KBASE_API_VERSION(10, 3) > replay_atom->kctx->api_version) { - base_jd_replay_payload_uk10_2 *payload_uk10_2; - u16 tiler_core_req; - u16 fragment_core_req; - - payload_uk10_2 = (base_jd_replay_payload_uk10_2 *) payload; - memcpy(&tiler_core_req, &payload_uk10_2->tiler_core_req, - sizeof(tiler_core_req)); - memcpy(&fragment_core_req, &payload_uk10_2->fragment_core_req, - sizeof(fragment_core_req)); - payload->tiler_core_req = (u32)(tiler_core_req & 0x7fff); - payload->fragment_core_req = (u32)(fragment_core_req & 0x7fff); - } -#endif /* BASE_LEGACY_UK10_2_SUPPORT */ - #ifdef CONFIG_MALI_DEBUG dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload); dev_dbg(kctx->kbdev->dev, "Payload structure:\n" @@ -809,35 +787,18 @@ static int kbasep_replay_parse_payload(struct kbase_context *kctx, payload->fragment_core_req); payload_dump(kctx, payload); #endif + t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER; f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER; /* Sanity check core requirements*/ - if ((t_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_T || - (f_atom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_FS || + if ((t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & + ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_T || + (f_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & + ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_FS || t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES || f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { - - int t_atom_type = t_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP; - int f_atom_type = f_atom->core_req & BASE_JD_REQ_ATOM_TYPE & ~BASE_JD_REQ_COHERENT_GROUP & ~BASE_JD_REQ_FS_AFBC; - int t_has_ex_res = t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES; - int f_has_ex_res = f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES; - - if (t_atom_type != BASE_JD_REQ_T) { - dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom not a tiler job. Was: 0x%x\n Expected: 0x%x", - t_atom_type, BASE_JD_REQ_T); - } - if (f_atom_type != BASE_JD_REQ_FS) { - dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom not a fragment shader. Was 0x%x Expected: 0x%x\n", - f_atom_type, BASE_JD_REQ_FS); - } - if (t_has_ex_res) { - dev_err(kctx->kbdev->dev, "Invalid core requirement: Tiler atom has external resources.\n"); - } - if (f_has_ex_res) { - dev_err(kctx->kbdev->dev, "Invalid core requirement: Fragment shader atom has external resources.\n"); - } - + dev_err(kctx->kbdev->dev, "Invalid core requirements\n"); goto out; } @@ -996,7 +957,7 @@ static bool kbase_replay_fault_check(struct kbase_jd_atom *katom) base_jd_replay_payload *payload; u64 job_header; u64 job_loop_detect; - struct job_descriptor_header *job; + struct job_head *job; struct kbase_vmap_struct job_map; struct kbase_vmap_struct map; bool err = false; @@ -1051,22 +1012,41 @@ static bool kbase_replay_fault_check(struct kbase_jd_atom *katom) } - dump_job_head(kctx, "\njob_head structure:\n", job); +#ifdef CONFIG_MALI_DEBUG + dev_dbg(dev, "\njob_head structure:\n" + "Source ID:0x%x Access:0x%x Exception:0x%x\n" + "at job addr = %p\n" + "not_complete_index = 0x%x\n" + "fault_addr = 0x%llx\n" + "flags = 0x%x\n" + "index = 0x%x\n" + "dependencies = 0x%x,0x%x\n", + JOB_SOURCE_ID(job->status), + ((job->status >> 8) & 0x3), + (job->status & 0xFF), + job, + job->not_complete_index, + job->fault_addr, + job->flags, + job->index, + job->dependencies[0], + job->dependencies[1]); +#endif /* Replay only when the polygon list reader caused the * DATA_INVALID_FAULT */ if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) && - (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->exception_status))) { + (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->status))) { err = true; kbase_vunmap(kctx, &job_map); break; } /* Move on to next fragment job in the list */ - if (job->job_descriptor_size) - job_header = job->next_job._64; + if (job->flags & JOB_FLAG_DESC_SIZE) + job_header = job->next._64; else - job_header = job->next_job._32; + job_header = job->next._32; kbase_vunmap(kctx, &job_map); diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c index 07b862546f80c..108c49d9150aa 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c +++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,23 +19,14 @@ #include -#if defined(CONFIG_DMA_SHARED_BUFFER) -#include -#include -#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ #include #ifdef CONFIG_SYNC #include "sync.h" #include #include "mali_kbase_sync.h" #endif -#include #include -#include #include -#include -#include -#include /* Mask to check cache alignment of data structures */ #define KBASE_CACHE_ALIGNMENT_MASK ((1<kctx; - unsigned long lflags; - - spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); - list_add_tail(&katom->queue, &kctx->waiting_soft_jobs); - spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); -} - -void kbasep_remove_waiting_soft_job(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - unsigned long lflags; - - spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); - list_del(&katom->queue); - spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); -} - -static void kbasep_add_waiting_with_timeout(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - - /* Record the start time of this atom so we could cancel it at - * the right time. - */ - katom->start_timestamp = ktime_get(); - - /* Add the atom to the waiting list before the timer is - * (re)started to make sure that it gets processed. - */ - kbasep_add_waiting_soft_job(katom); - - /* Schedule timeout of this atom after a period if it is not active */ - if (!timer_pending(&kctx->soft_job_timeout)) { - int timeout_ms = atomic_read( - &kctx->kbdev->js_data.soft_job_timeout_ms); - mod_timer(&kctx->soft_job_timeout, - jiffies + msecs_to_jiffies(timeout_ms)); - } -} - -static int kbasep_read_soft_event_status( - struct kbase_context *kctx, u64 evt, unsigned char *status) -{ - unsigned char *mapped_evt; - struct kbase_vmap_struct map; - - mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); - if (!mapped_evt) - return -EFAULT; - - *status = *mapped_evt; - - kbase_vunmap(kctx, &map); - - return 0; -} - -static int kbasep_write_soft_event_status( - struct kbase_context *kctx, u64 evt, unsigned char new_status) -{ - unsigned char *mapped_evt; - struct kbase_vmap_struct map; - - if ((new_status != BASE_JD_SOFT_EVENT_SET) && - (new_status != BASE_JD_SOFT_EVENT_RESET)) - return -EINVAL; - - mapped_evt = kbase_vmap(kctx, evt, sizeof(*mapped_evt), &map); - if (!mapped_evt) - return -EFAULT; - - *mapped_evt = new_status; - - kbase_vunmap(kctx, &map); - - return 0; -} - static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) { - struct kbase_vmap_struct map; - void *user_result; + struct kbase_va_region *reg; + phys_addr_t addr = 0; + u64 pfn; + u32 offset; + char *page; struct timespec ts; struct base_dump_cpu_gpu_counters data; u64 system_time; @@ -150,16 +63,11 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; /* We're suspended - queue this on the list of suspended jobs - * Use dep_item[1], because dep_item[0] was previously in use - * for 'waiting_soft_jobs'. - */ + * Use dep_item[1], because dep_item[0] is in use for 'waiting_soft_jobs' */ mutex_lock(&js_devdata->runpool_mutex); list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list); mutex_unlock(&js_devdata->runpool_mutex); - /* Also adding this to the list of waiting soft job */ - kbasep_add_waiting_soft_job(katom); - return pm_active_err; } @@ -173,20 +81,44 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) data.system_time = system_time; data.cycle_counter = cycle_counter; + pfn = jc >> PAGE_SHIFT; + offset = jc & ~PAGE_MASK; + /* Assume this atom will be cancelled until we know otherwise */ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + if (offset > 0x1000 - sizeof(data)) { + /* Wouldn't fit in the page */ + return 0; + } + + kbase_gpu_vm_lock(kctx); + reg = kbase_region_tracker_find_region_enclosing_address(kctx, jc); + if (reg && + (reg->flags & KBASE_REG_GPU_WR) && + reg->cpu_alloc && reg->cpu_alloc->pages) + addr = reg->cpu_alloc->pages[pfn - reg->start_pfn]; + + kbase_gpu_vm_unlock(kctx); + if (!addr) + return 0; - /* GPU_WR access is checked on the range for returning the result to - * userspace for the following reasons: - * - security, this is currently how imported user bufs are checked. - * - userspace ddk guaranteed to assume region was mapped as GPU_WR */ - user_result = kbase_vmap_prot(kctx, jc, sizeof(data), KBASE_REG_GPU_WR, &map); - if (!user_result) + page = kmap(pfn_to_page(PFN_DOWN(addr))); + if (!page) return 0; - memcpy(user_result, &data, sizeof(data)); + kbase_sync_single_for_cpu(katom->kctx->kbdev, + kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) + + offset, sizeof(data), + DMA_BIDIRECTIONAL); - kbase_vunmap(kctx, &map); + memcpy(page + offset, &data, sizeof(data)); + + kbase_sync_single_for_device(katom->kctx->kbdev, + kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) + + offset, sizeof(data), + DMA_BIDIRECTIONAL); + + kunmap(pfn_to_page(PFN_DOWN(addr))); /* Atom was fine - mark it as done */ katom->event_code = BASE_JD_EVENT_DONE; @@ -196,6 +128,22 @@ static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) #ifdef CONFIG_SYNC +/* Complete an atom that has returned '1' from kbase_process_soft_job (i.e. has waited) + * + * @param katom The atom to complete + */ +static void complete_soft_job(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + + mutex_lock(&kctx->jctx.lock); + list_del(&katom->dep_item[0]); + kbase_finish_soft_job(katom); + if (jd_done_nolock(katom, NULL)) + kbase_js_sched_all(kctx->kbdev); + mutex_unlock(&kctx->jctx.lock); +} + static enum base_jd_event_code kbase_fence_trigger(struct kbase_jd_atom *katom, int result) { struct sync_pt *pt; @@ -237,12 +185,7 @@ static void kbase_fence_wait_worker(struct work_struct *data) katom = container_of(data, struct kbase_jd_atom, work); kctx = katom->kctx; - mutex_lock(&kctx->jctx.lock); - kbasep_remove_waiting_soft_job(katom); - kbase_finish_soft_job(katom); - if (jd_done_nolock(katom, NULL)) - kbase_js_sched_all(kctx->kbdev); - mutex_unlock(&kctx->jctx.lock); + complete_soft_job(katom); } static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fence_waiter *waiter) @@ -259,7 +202,11 @@ static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fenc /* Propagate the fence status to the atom. * If negative then cancel this atom and its dependencies. */ - if (kbase_fence_get_status(fence) < 0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + if (fence->status < 0) +#else + if (atomic_read(&fence->status) < 0) +#endif katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; /* To prevent a potential deadlock we schedule the work onto the job_done_wq workqueue @@ -287,25 +234,18 @@ static int kbase_fence_wait(struct kbase_jd_atom *katom) if (ret == 1) { /* Already signalled */ return 0; + } else if (ret < 0) { + goto cancel_atom; } + return 1; - if (ret < 0) { - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - /* We should cause the dependent jobs in the bag to be failed, - * to do this we schedule the work queue to complete this job */ - KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); - INIT_WORK(&katom->work, kbase_fence_wait_worker); - queue_work(katom->kctx->jctx.job_done_wq, &katom->work); - } - -#ifdef CONFIG_MALI_FENCE_DEBUG - /* The timeout code will add this job to the list of waiting soft jobs. - */ - kbasep_add_waiting_with_timeout(katom); -#else - kbasep_add_waiting_soft_job(katom); -#endif - + cancel_atom: + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + /* We should cause the dependant jobs in the bag to be failed, + * to do this we schedule the work queue to complete this job */ + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); + INIT_WORK(&katom->work, kbase_fence_wait_worker); + queue_work(katom->kctx->jctx.job_done_wq, &katom->work); return 1; } @@ -336,7 +276,6 @@ static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom) finish_softjob: katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - kbasep_remove_waiting_soft_job(katom); kbase_finish_soft_job(katom); if (jd_done_nolock(katom, NULL)) @@ -344,959 +283,9 @@ static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom) } #endif /* CONFIG_SYNC */ -static void kbasep_soft_event_complete_job(struct work_struct *work) -{ - struct kbase_jd_atom *katom = container_of(work, struct kbase_jd_atom, - work); - struct kbase_context *kctx = katom->kctx; - int resched; - - mutex_lock(&kctx->jctx.lock); - resched = jd_done_nolock(katom, NULL); - mutex_unlock(&kctx->jctx.lock); - - if (resched) - kbase_js_sched_all(kctx->kbdev); -} - -void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt) -{ - int cancel_timer = 1; - struct list_head *entry, *tmp; - unsigned long lflags; - - spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); - list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { - struct kbase_jd_atom *katom = list_entry( - entry, struct kbase_jd_atom, queue); - - switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { - case BASE_JD_REQ_SOFT_EVENT_WAIT: - if (katom->jc == evt) { - list_del(&katom->queue); - - katom->event_code = BASE_JD_EVENT_DONE; - INIT_WORK(&katom->work, - kbasep_soft_event_complete_job); - queue_work(kctx->jctx.job_done_wq, - &katom->work); - } else { - /* There are still other waiting jobs, we cannot - * cancel the timer yet. - */ - cancel_timer = 0; - } - break; -#ifdef CONFIG_MALI_FENCE_DEBUG - case BASE_JD_REQ_SOFT_FENCE_WAIT: - /* Keep the timer running if fence debug is enabled and - * there are waiting fence jobs. - */ - cancel_timer = 0; - break; -#endif - } - } - - if (cancel_timer) - del_timer(&kctx->soft_job_timeout); - spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); -} - -#ifdef CONFIG_MALI_FENCE_DEBUG -static char *kbase_fence_debug_status_string(int status) -{ - if (status == 0) - return "signaled"; - else if (status > 0) - return "active"; - else - return "error"; -} - -static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - struct device *dev = kctx->kbdev->dev; - int i; - - for (i = 0; i < 2; i++) { - struct kbase_jd_atom *dep; - - list_for_each_entry(dep, &katom->dep_head[i], dep_item[i]) { - if (dep->status == KBASE_JD_ATOM_STATE_UNUSED || - dep->status == KBASE_JD_ATOM_STATE_COMPLETED) - continue; - - if ((dep->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) - == BASE_JD_REQ_SOFT_FENCE_TRIGGER) { - struct sync_fence *fence = dep->fence; - int status = kbase_fence_get_status(fence); - - /* Found blocked trigger fence. */ - dev_warn(dev, - "\tVictim trigger atom %d fence [%p] %s: %s\n", - kbase_jd_atom_id(kctx, dep), - fence, fence->name, - kbase_fence_debug_status_string(status)); - } - - kbase_fence_debug_check_atom(dep); - } - } -} - -static void kbase_fence_debug_wait_timeout(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - struct device *dev = katom->kctx->kbdev->dev; - struct sync_fence *fence = katom->fence; - int timeout_ms = atomic_read(&kctx->kbdev->js_data.soft_job_timeout_ms); - int status = kbase_fence_get_status(fence); - unsigned long lflags; - - spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); - - dev_warn(dev, "ctx %d_%d: Atom %d still waiting for fence [%p] after %dms\n", - kctx->tgid, kctx->id, - kbase_jd_atom_id(kctx, katom), - fence, timeout_ms); - dev_warn(dev, "\tGuilty fence [%p] %s: %s\n", - fence, fence->name, - kbase_fence_debug_status_string(status)); - - /* Search for blocked trigger atoms */ - kbase_fence_debug_check_atom(katom); - - spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); - - /* Dump out the full state of all the Android sync fences. - * The function sync_dump() isn't exported to modules, so force - * sync_fence_wait() to time out to trigger sync_dump(). - */ - sync_fence_wait(fence, 1); -} - -struct kbase_fence_debug_work { - struct kbase_jd_atom *katom; - struct work_struct work; -}; - -static void kbase_fence_debug_wait_timeout_worker(struct work_struct *work) -{ - struct kbase_fence_debug_work *w = container_of(work, - struct kbase_fence_debug_work, work); - struct kbase_jd_atom *katom = w->katom; - struct kbase_context *kctx = katom->kctx; - - mutex_lock(&kctx->jctx.lock); - kbase_fence_debug_wait_timeout(katom); - mutex_unlock(&kctx->jctx.lock); - - kfree(w); -} - -static void kbase_fence_debug_timeout(struct kbase_jd_atom *katom) -{ - struct kbase_fence_debug_work *work; - struct kbase_context *kctx = katom->kctx; - - /* Enqueue fence debug worker. Use job_done_wq to get - * debug print ordered with job completion. - */ - work = kzalloc(sizeof(struct kbase_fence_debug_work), GFP_ATOMIC); - /* Ignore allocation failure. */ - if (work) { - work->katom = katom; - INIT_WORK(&work->work, kbase_fence_debug_wait_timeout_worker); - queue_work(kctx->jctx.job_done_wq, &work->work); - } -} -#endif /* CONFIG_MALI_FENCE_DEBUG */ - -void kbasep_soft_job_timeout_worker(unsigned long data) -{ - struct kbase_context *kctx = (struct kbase_context *)data; - u32 timeout_ms = (u32)atomic_read( - &kctx->kbdev->js_data.soft_job_timeout_ms); - struct timer_list *timer = &kctx->soft_job_timeout; - ktime_t cur_time = ktime_get(); - bool restarting = false; - unsigned long lflags; - struct list_head *entry, *tmp; - - spin_lock_irqsave(&kctx->waiting_soft_jobs_lock, lflags); - list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { - struct kbase_jd_atom *katom = list_entry(entry, - struct kbase_jd_atom, queue); - s64 elapsed_time = ktime_to_ms(ktime_sub(cur_time, - katom->start_timestamp)); - - if (elapsed_time < (s64)timeout_ms) { - restarting = true; - continue; - } - - switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { - case BASE_JD_REQ_SOFT_EVENT_WAIT: - /* Take it out of the list to ensure that it - * will be cancelled in all cases - */ - list_del(&katom->queue); - - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - INIT_WORK(&katom->work, kbasep_soft_event_complete_job); - queue_work(kctx->jctx.job_done_wq, &katom->work); - break; -#ifdef CONFIG_MALI_FENCE_DEBUG - case BASE_JD_REQ_SOFT_FENCE_WAIT: - kbase_fence_debug_timeout(katom); - break; -#endif - } - } - - if (restarting) - mod_timer(timer, jiffies + msecs_to_jiffies(timeout_ms)); - spin_unlock_irqrestore(&kctx->waiting_soft_jobs_lock, lflags); -} - -static int kbasep_soft_event_wait(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - unsigned char status; - - /* The status of this soft-job is stored in jc */ - if (kbasep_read_soft_event_status(kctx, katom->jc, &status)) { - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - return 0; - } - - if (status == BASE_JD_SOFT_EVENT_SET) - return 0; /* Event already set, nothing to do */ - - kbasep_add_waiting_with_timeout(katom); - - return 1; -} - -static void kbasep_soft_event_update_locked(struct kbase_jd_atom *katom, - unsigned char new_status) -{ - /* Complete jobs waiting on the same event */ - struct kbase_context *kctx = katom->kctx; - - if (kbasep_write_soft_event_status(kctx, katom->jc, new_status) != 0) { - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - return; - } - - if (new_status == BASE_JD_SOFT_EVENT_SET) - kbasep_complete_triggered_soft_events(kctx, katom->jc); -} - -/** - * kbase_soft_event_update() - Update soft event state - * @kctx: Pointer to context - * @event: Event to update - * @new_status: New status value of event - * - * Update the event, and wake up any atoms waiting for the event. - * - * Return: 0 on success, a negative error code on failure. - */ -int kbase_soft_event_update(struct kbase_context *kctx, - u64 event, - unsigned char new_status) -{ - int err = 0; - - mutex_lock(&kctx->jctx.lock); - - if (kbasep_write_soft_event_status(kctx, event, new_status)) { - err = -ENOENT; - goto out; - } - - if (new_status == BASE_JD_SOFT_EVENT_SET) - kbasep_complete_triggered_soft_events(kctx, event); - -out: - mutex_unlock(&kctx->jctx.lock); - - return err; -} - -static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom) -{ - katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; - if (jd_done_nolock(katom, NULL)) - kbase_js_sched_all(katom->kctx->kbdev); -} - -struct kbase_debug_copy_buffer { - size_t size; - struct page **pages; - int nr_pages; - size_t offset; - /*To find memory region*/ - u64 gpu_addr; - - struct page **extres_pages; - int nr_extres_pages; -}; - -static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer) -{ - struct page **pages = buffer->extres_pages; - int nr_pages = buffer->nr_extres_pages; - - if (pages) { - int i; - - for (i = 0; i < nr_pages; i++) { - struct page *pg = pages[i]; - - if (pg) - put_page(pg); - } - kfree(pages); - } -} - -static void kbase_debug_copy_finish(struct kbase_jd_atom *katom) -{ - struct kbase_debug_copy_buffer *buffers = - (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc; - unsigned int i; - unsigned int nr = katom->nr_extres; - - if (!buffers) - return; - - kbase_gpu_vm_lock(katom->kctx); - for (i = 0; i < nr; i++) { - int p; - struct kbase_va_region *reg; - - reg = kbase_region_tracker_find_region_enclosing_address( - katom->kctx, buffers[i].gpu_addr); - - if (!buffers[i].pages) - break; - for (p = 0; p < buffers[i].nr_pages; p++) { - struct page *pg = buffers[i].pages[p]; - - if (pg) - put_page(pg); - } - kfree(buffers[i].pages); - if (reg && reg->gpu_alloc) { - switch (reg->gpu_alloc->type) { - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: - { - free_user_buffer(&buffers[i]); - break; - } - default: - /* Nothing to be done. */ - break; - } - kbase_mem_phy_alloc_put(reg->gpu_alloc); - } - } - kbase_gpu_vm_unlock(katom->kctx); - kfree(buffers); - - katom->jc = 0; -} - -static int kbase_debug_copy_prepare(struct kbase_jd_atom *katom) -{ - struct kbase_debug_copy_buffer *buffers; - struct base_jd_debug_copy_buffer *user_buffers = NULL; - unsigned int i; - unsigned int nr = katom->nr_extres; - int ret = 0; - void __user *user_structs = (void __user *)(uintptr_t)katom->jc; - - if (!user_structs) - return -EINVAL; - - buffers = kcalloc(nr, sizeof(*buffers), GFP_KERNEL); - if (!buffers) { - ret = -ENOMEM; - katom->jc = 0; - goto out_cleanup; - } - katom->jc = (u64)(uintptr_t)buffers; - - user_buffers = kmalloc_array(nr, sizeof(*user_buffers), GFP_KERNEL); - - if (!user_buffers) { - ret = -ENOMEM; - goto out_cleanup; - } - - ret = copy_from_user(user_buffers, user_structs, - sizeof(*user_buffers)*nr); - if (ret) - goto out_cleanup; - - for (i = 0; i < nr; i++) { - u64 addr = user_buffers[i].address; - u64 page_addr = addr & PAGE_MASK; - u64 end_page_addr = addr + user_buffers[i].size - 1; - u64 last_page_addr = end_page_addr & PAGE_MASK; - int nr_pages = (last_page_addr-page_addr)/PAGE_SIZE+1; - int pinned_pages; - struct kbase_va_region *reg; - struct base_external_resource user_extres; - - if (!addr) - continue; - - buffers[i].nr_pages = nr_pages; - buffers[i].offset = addr & ~PAGE_MASK; - if (buffers[i].offset >= PAGE_SIZE) { - ret = -EINVAL; - goto out_cleanup; - } - buffers[i].size = user_buffers[i].size; - - buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *), - GFP_KERNEL); - if (!buffers[i].pages) { - ret = -ENOMEM; - goto out_cleanup; - } - - pinned_pages = get_user_pages_fast(page_addr, - nr_pages, - 1, /* Write */ - buffers[i].pages); - if (pinned_pages < 0) { - ret = pinned_pages; - goto out_cleanup; - } - if (pinned_pages != nr_pages) { - ret = -EINVAL; - goto out_cleanup; - } - - user_extres = user_buffers[i].extres; - if (user_extres.ext_resource == 0ULL) { - ret = -EINVAL; - goto out_cleanup; - } - - buffers[i].gpu_addr = user_extres.ext_resource & - ~BASE_EXT_RES_ACCESS_EXCLUSIVE; - kbase_gpu_vm_lock(katom->kctx); - reg = kbase_region_tracker_find_region_enclosing_address( - katom->kctx, buffers[i].gpu_addr); - - if (NULL == reg || NULL == reg->cpu_alloc || - (reg->flags & KBASE_REG_FREE)) { - ret = -EINVAL; - goto out_unlock; - } - kbase_mem_phy_alloc_get(reg->gpu_alloc); - - buffers[i].nr_extres_pages = reg->nr_pages; - if (reg->nr_pages*PAGE_SIZE != buffers[i].size) - dev_warn(katom->kctx->kbdev->dev, "Copy buffer is not of same size as the external resource to copy.\n"); - - switch (reg->gpu_alloc->type) { - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: - { - struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc; - unsigned long nr_pages = - alloc->imported.user_buf.nr_pages; - - if (alloc->imported.user_buf.mm != current->mm) { - ret = -EINVAL; - goto out_unlock; - } - buffers[i].extres_pages = kcalloc(nr_pages, - sizeof(struct page *), GFP_KERNEL); - if (!buffers[i].extres_pages) { - ret = -ENOMEM; - goto out_unlock; - } - - ret = get_user_pages_fast( - alloc->imported.user_buf.address, - nr_pages, 0, - buffers[i].extres_pages); - if (ret != nr_pages) - goto out_unlock; - ret = 0; - break; - } - case KBASE_MEM_TYPE_IMPORTED_UMP: - { - dev_warn(katom->kctx->kbdev->dev, - "UMP is not supported for debug_copy jobs\n"); - ret = -EINVAL; - goto out_unlock; - } - default: - /* Nothing to be done. */ - break; - } - kbase_gpu_vm_unlock(katom->kctx); - } - kfree(user_buffers); - - return ret; - -out_unlock: - kbase_gpu_vm_unlock(katom->kctx); - -out_cleanup: - kfree(buffers); - kfree(user_buffers); - - /* Frees allocated memory for kbase_debug_copy_job struct, including - * members, and sets jc to 0 */ - kbase_debug_copy_finish(katom); - return ret; -} - -static void kbase_mem_copy_from_extres_page(struct kbase_context *kctx, - void *extres_page, struct page **pages, unsigned int nr_pages, - unsigned int *target_page_nr, size_t offset, size_t *to_copy) -{ - void *target_page = kmap(pages[*target_page_nr]); - size_t chunk = PAGE_SIZE-offset; - - if (!target_page) { - *target_page_nr += 1; - dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job."); - return; - } - - chunk = min(chunk, *to_copy); - - memcpy(target_page + offset, extres_page, chunk); - *to_copy -= chunk; - - kunmap(pages[*target_page_nr]); - - *target_page_nr += 1; - if (*target_page_nr >= nr_pages) - return; - - target_page = kmap(pages[*target_page_nr]); - if (!target_page) { - *target_page_nr += 1; - dev_warn(kctx->kbdev->dev, "kmap failed in debug_copy job."); - return; - } - - KBASE_DEBUG_ASSERT(target_page); - - chunk = min(offset, *to_copy); - memcpy(target_page, extres_page + PAGE_SIZE-offset, chunk); - *to_copy -= chunk; - - kunmap(pages[*target_page_nr]); -} - -static int kbase_mem_copy_from_extres(struct kbase_context *kctx, - struct kbase_debug_copy_buffer *buf_data) -{ - unsigned int i; - unsigned int target_page_nr = 0; - struct kbase_va_region *reg; - struct page **pages = buf_data->pages; - u64 offset = buf_data->offset; - size_t extres_size = buf_data->nr_extres_pages*PAGE_SIZE; - size_t to_copy = min(extres_size, buf_data->size); - int ret = 0; - - KBASE_DEBUG_ASSERT(pages != NULL); - - kbase_gpu_vm_lock(kctx); - reg = kbase_region_tracker_find_region_enclosing_address( - kctx, buf_data->gpu_addr); - - if (!reg) { - ret = -EINVAL; - goto out_unlock; - } - - switch (reg->gpu_alloc->type) { - case KBASE_MEM_TYPE_IMPORTED_USER_BUF: - { - for (i = 0; i < buf_data->nr_extres_pages; i++) { - struct page *pg = buf_data->extres_pages[i]; - void *extres_page = kmap(pg); - - if (extres_page) - kbase_mem_copy_from_extres_page(kctx, - extres_page, pages, - buf_data->nr_pages, - &target_page_nr, - offset, &to_copy); - - kunmap(pg); - if (target_page_nr >= buf_data->nr_pages) - break; - } - break; - } - break; -#ifdef CONFIG_DMA_SHARED_BUFFER - case KBASE_MEM_TYPE_IMPORTED_UMM: { - struct dma_buf *dma_buf = reg->gpu_alloc->imported.umm.dma_buf; - - KBASE_DEBUG_ASSERT(dma_buf != NULL); - KBASE_DEBUG_ASSERT(dma_buf->size == - buf_data->nr_extres_pages * PAGE_SIZE); - - ret = dma_buf_begin_cpu_access(dma_buf, -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) - 0, buf_data->nr_extres_pages*PAGE_SIZE, -#endif - DMA_FROM_DEVICE); - if (ret) - goto out_unlock; - - for (i = 0; i < buf_data->nr_extres_pages; i++) { - - void *extres_page = dma_buf_kmap(dma_buf, i); - - if (extres_page) - kbase_mem_copy_from_extres_page(kctx, - extres_page, pages, - buf_data->nr_pages, - &target_page_nr, - offset, &to_copy); - - dma_buf_kunmap(dma_buf, i, extres_page); - if (target_page_nr >= buf_data->nr_pages) - break; - } - dma_buf_end_cpu_access(dma_buf, -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) - 0, buf_data->nr_extres_pages*PAGE_SIZE, -#endif - DMA_FROM_DEVICE); - break; - } -#endif - default: - ret = -EINVAL; - } -out_unlock: - kbase_gpu_vm_unlock(kctx); - return ret; - -} - -static int kbase_debug_copy(struct kbase_jd_atom *katom) -{ - struct kbase_debug_copy_buffer *buffers = - (struct kbase_debug_copy_buffer *)(uintptr_t)katom->jc; - unsigned int i; - - for (i = 0; i < katom->nr_extres; i++) { - int res = kbase_mem_copy_from_extres(katom->kctx, &buffers[i]); - - if (res) - return res; - } - - return 0; -} - -static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom) -{ - __user void *data = (__user void *)(uintptr_t) katom->jc; - struct base_jit_alloc_info *info; - int ret; - - /* Fail the job if there is no info structure */ - if (!data) { - ret = -EINVAL; - goto fail; - } - - /* Copy the information for safe access and future storage */ - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) { - ret = -ENOMEM; - goto fail; - } - - if (copy_from_user(info, data, sizeof(*info)) != 0) { - ret = -EINVAL; - goto free_info; - } - - /* If the ID is zero then fail the job */ - if (info->id == 0) { - ret = -EINVAL; - goto free_info; - } - - /* Sanity check that the PA fits within the VA */ - if (info->va_pages < info->commit_pages) { - ret = -EINVAL; - goto free_info; - } - - /* Ensure the GPU address is correctly aligned */ - if ((info->gpu_alloc_addr & 0x7) != 0) { - ret = -EINVAL; - goto free_info; - } - - /* Replace the user pointer with our kernel allocated info structure */ - katom->jc = (u64)(uintptr_t) info; - - /* - * Note: - * The provided info->gpu_alloc_addr isn't validated here as - * userland can cache allocations which means that even - * though the region is valid it doesn't represent the - * same thing it used to. - * - * Complete validation of va_pages, commit_pages and extent - * isn't done here as it will be done during the call to - * kbase_mem_alloc. - */ - return 0; - -free_info: - kfree(info); -fail: - katom->jc = 0; - return ret; -} - -static void kbase_jit_allocate_process(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - struct base_jit_alloc_info *info; - struct kbase_va_region *reg; - struct kbase_vmap_struct mapping; - u64 *ptr; - - info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc; - - /* The JIT ID is still in use so fail the allocation */ - if (kctx->jit_alloc[info->id]) { - katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; - return; - } - - /* - * Mark the allocation so we know it's in use even if the - * allocation itself fails. - */ - kctx->jit_alloc[info->id] = (struct kbase_va_region *) -1; - - /* Create a JIT allocation */ - reg = kbase_jit_allocate(kctx, info); - if (!reg) { - katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED; - return; - } - - /* - * Write the address of the JIT allocation to the user provided - * GPU allocation. - */ - ptr = kbase_vmap(kctx, info->gpu_alloc_addr, sizeof(*ptr), - &mapping); - if (!ptr) { - /* - * Leave the allocation "live" as the JIT free jit will be - * submitted anyway. - */ - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return; - } - - *ptr = reg->start_pfn << PAGE_SHIFT; - kbase_vunmap(kctx, &mapping); - - katom->event_code = BASE_JD_EVENT_DONE; - - /* - * Bind it to the user provided ID. Do this last so we can check for - * the JIT free racing this JIT alloc job. - */ - kctx->jit_alloc[info->id] = reg; -} - -static void kbase_jit_allocate_finish(struct kbase_jd_atom *katom) -{ - struct base_jit_alloc_info *info; - - info = (struct base_jit_alloc_info *) (uintptr_t) katom->jc; - /* Free the info structure */ - kfree(info); -} - -static void kbase_jit_free_process(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - u8 id = (u8) katom->jc; - - /* - * If the ID is zero or it is not in use yet then fail the job. - */ - if ((id == 0) || (kctx->jit_alloc[id] == NULL)) { - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - return; - } - - /* - * If the ID is valid but the allocation request failed still succeed - * this soft job but don't try and free the allocation. - */ - if (kctx->jit_alloc[id] != (struct kbase_va_region *) -1) - kbase_jit_free(kctx, kctx->jit_alloc[id]); - - kctx->jit_alloc[id] = NULL; -} - -static int kbase_ext_res_prepare(struct kbase_jd_atom *katom) -{ - __user struct base_external_resource_list *user_ext_res; - struct base_external_resource_list *ext_res; - u64 count = 0; - size_t copy_size; - int ret; - - user_ext_res = (__user struct base_external_resource_list *) - (uintptr_t) katom->jc; - - /* Fail the job if there is no info structure */ - if (!user_ext_res) { - ret = -EINVAL; - goto fail; - } - - if (copy_from_user(&count, &user_ext_res->count, sizeof(u64)) != 0) { - ret = -EINVAL; - goto fail; - } - - /* Is the number of external resources in range? */ - if (!count || count > BASE_EXT_RES_COUNT_MAX) { - ret = -EINVAL; - goto fail; - } - - /* Copy the information for safe access and future storage */ - copy_size = sizeof(*ext_res); - copy_size += sizeof(struct base_external_resource) * (count - 1); - ext_res = kzalloc(copy_size, GFP_KERNEL); - if (!ext_res) { - ret = -ENOMEM; - goto fail; - } - - if (copy_from_user(ext_res, user_ext_res, copy_size) != 0) { - ret = -EINVAL; - goto free_info; - } - - /* - * Overwrite the count with the first value incase it was changed - * after the fact. - */ - ext_res->count = count; - - /* - * Replace the user pointer with our kernel allocated - * ext_res structure. - */ - katom->jc = (u64)(uintptr_t) ext_res; - - return 0; - -free_info: - kfree(ext_res); -fail: - return ret; -} - -static void kbase_ext_res_process(struct kbase_jd_atom *katom, bool map) -{ - struct base_external_resource_list *ext_res; - int i; - bool failed = false; - - ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc; - if (!ext_res) - goto failed_jc; - - kbase_gpu_vm_lock(katom->kctx); - - for (i = 0; i < ext_res->count; i++) { - u64 gpu_addr; - - gpu_addr = ext_res->ext_res[i].ext_resource & - ~BASE_EXT_RES_ACCESS_EXCLUSIVE; - if (map) { - if (!kbase_sticky_resource_acquire(katom->kctx, - gpu_addr)) - goto failed_loop; - } else - if (!kbase_sticky_resource_release(katom->kctx, NULL, - gpu_addr)) - failed = true; - } - - /* - * In the case of unmap we continue unmapping other resources in the - * case of failure but will always report failure if _any_ unmap - * request fails. - */ - if (failed) - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - else - katom->event_code = BASE_JD_EVENT_DONE; - - kbase_gpu_vm_unlock(katom->kctx); - - return; - -failed_loop: - while (--i > 0) { - u64 gpu_addr; - - gpu_addr = ext_res->ext_res[i].ext_resource & - ~BASE_EXT_RES_ACCESS_EXCLUSIVE; - - kbase_sticky_resource_release(katom->kctx, NULL, gpu_addr); - } - - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - kbase_gpu_vm_unlock(katom->kctx); - -failed_jc: - return; -} - -static void kbase_ext_res_finish(struct kbase_jd_atom *katom) -{ - struct base_external_resource_list *ext_res; - - ext_res = (struct base_external_resource_list *) (uintptr_t) katom->jc; - /* Free the info structure */ - kfree(ext_res); -} - int kbase_process_soft_job(struct kbase_jd_atom *katom) { - switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: return kbase_dump_cpu_gpu_time(katom); #ifdef CONFIG_SYNC @@ -1312,34 +301,6 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) #endif /* CONFIG_SYNC */ case BASE_JD_REQ_SOFT_REPLAY: return kbase_replay_process(katom); - case BASE_JD_REQ_SOFT_EVENT_WAIT: - return kbasep_soft_event_wait(katom); - case BASE_JD_REQ_SOFT_EVENT_SET: - kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_SET); - break; - case BASE_JD_REQ_SOFT_EVENT_RESET: - kbasep_soft_event_update_locked(katom, BASE_JD_SOFT_EVENT_RESET); - break; - case BASE_JD_REQ_SOFT_DEBUG_COPY: - { - int res = kbase_debug_copy(katom); - - if (res) - katom->event_code = BASE_JD_EVENT_JOB_INVALID; - break; - } - case BASE_JD_REQ_SOFT_JIT_ALLOC: - kbase_jit_allocate_process(katom); - break; - case BASE_JD_REQ_SOFT_JIT_FREE: - kbase_jit_free_process(katom); - break; - case BASE_JD_REQ_SOFT_EXT_RES_MAP: - kbase_ext_res_process(katom, true); - break; - case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: - kbase_ext_res_process(katom, false); - break; } /* Atom is complete */ @@ -1348,15 +309,12 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom) void kbase_cancel_soft_job(struct kbase_jd_atom *katom) { - switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { #ifdef CONFIG_SYNC case BASE_JD_REQ_SOFT_FENCE_WAIT: kbase_fence_cancel_wait(katom); break; #endif - case BASE_JD_REQ_SOFT_EVENT_WAIT: - kbasep_soft_event_cancel_job(katom); - break; default: /* This soft-job doesn't support cancellation! */ KBASE_DEBUG_ASSERT(0); @@ -1365,7 +323,7 @@ void kbase_cancel_soft_job(struct kbase_jd_atom *katom) int kbase_prepare_soft_job(struct kbase_jd_atom *katom) { - switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: { if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK)) @@ -1414,23 +372,8 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) } break; #endif /* CONFIG_SYNC */ - case BASE_JD_REQ_SOFT_JIT_ALLOC: - return kbase_jit_allocate_prepare(katom); case BASE_JD_REQ_SOFT_REPLAY: - case BASE_JD_REQ_SOFT_JIT_FREE: break; - case BASE_JD_REQ_SOFT_EVENT_WAIT: - case BASE_JD_REQ_SOFT_EVENT_SET: - case BASE_JD_REQ_SOFT_EVENT_RESET: - if (katom->jc == 0) - return -EINVAL; - break; - case BASE_JD_REQ_SOFT_DEBUG_COPY: - return kbase_debug_copy_prepare(katom); - case BASE_JD_REQ_SOFT_EXT_RES_MAP: - return kbase_ext_res_prepare(katom); - case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: - return kbase_ext_res_prepare(katom); default: /* Unsupported soft-job */ return -EINVAL; @@ -1440,7 +383,7 @@ int kbase_prepare_soft_job(struct kbase_jd_atom *katom) void kbase_finish_soft_job(struct kbase_jd_atom *katom) { - switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) { + switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: /* Nothing to do */ break; @@ -1462,19 +405,6 @@ void kbase_finish_soft_job(struct kbase_jd_atom *katom) } break; #endif /* CONFIG_SYNC */ - - case BASE_JD_REQ_SOFT_DEBUG_COPY: - kbase_debug_copy_finish(katom); - break; - case BASE_JD_REQ_SOFT_JIT_ALLOC: - kbase_jit_allocate_finish(katom); - break; - case BASE_JD_REQ_SOFT_EXT_RES_MAP: - kbase_ext_res_finish(katom); - break; - case BASE_JD_REQ_SOFT_EXT_RES_UNMAP: - kbase_ext_res_finish(katom); - break; } } @@ -1509,15 +439,18 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) /* Remove from the global list */ list_del(&katom_iter->dep_item[1]); /* Remove from the context's list of waiting soft jobs */ - kbasep_remove_waiting_soft_job(katom_iter); + list_del(&katom_iter->dep_item[0]); if (kbase_process_soft_job(katom_iter) == 0) { kbase_finish_soft_job(katom_iter); resched |= jd_done_nolock(katom_iter, NULL); } else { + /* The job has not completed */ KBASE_DEBUG_ASSERT((katom_iter->core_req & - BASE_JD_REQ_SOFT_JOB_TYPE) + BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_SOFT_REPLAY); + list_add_tail(&katom_iter->dep_item[0], + &kctx->waiting_soft_jobs); } mutex_unlock(&kctx->jctx.lock); diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.h b/drivers/gpu/arm/midgard/mali_kbase_sync.h index 820bddc8c8b16..6d8e34d3c3ae7 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_sync.h +++ b/drivers/gpu/arm/midgard/mali_kbase_sync.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,15 +36,6 @@ static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt) } #endif -static inline int kbase_fence_get_status(struct sync_fence *fence) -{ -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) - return fence->status; -#else - return atomic_read(&fence->status); -#endif -} - /* * Create a stream object. * Built on top of timeline object. diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_user.c b/drivers/gpu/arm/midgard/mali_kbase_sync_user.c index b9baa913a6932..ddd0847a69c5d 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_sync_user.c +++ b/drivers/gpu/arm/midgard/mali_kbase_sync_user.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -33,6 +33,7 @@ #include #include #include +#include static int kbase_stream_close(struct inode *inode, struct file *file) { diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c index ad88b7b690184..0994ddd7c95ff 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -41,6 +41,9 @@ /* The number of nanoseconds in a second. */ #define NSECS_IN_SEC 1000000000ull /* ns */ +/* The number of nanoseconds to wait before autoflushing the stream. */ +#define AUTOFLUSH_TIMEOUT (2ull * NSECS_IN_SEC) /* ns */ + /* The period of autoflush checker execution in milliseconds. */ #define AUTOFLUSH_INTERVAL 1000 /* ms */ @@ -139,8 +142,6 @@ enum tl_msg_id_obj { KBASE_TL_RET_ATOM_AS, KBASE_TL_NRET_ATOM_AS, KBASE_TL_DEP_ATOM_ATOM, - KBASE_TL_NDEP_ATOM_ATOM, - KBASE_TL_RDEP_ATOM_ATOM, KBASE_TL_ATTRIB_ATOM_CONFIG, KBASE_TL_ATTRIB_AS_CONFIG, @@ -151,9 +152,7 @@ enum tl_msg_id_obj { /* Message ids of trace events that are recorded in the auxiliary stream. */ enum tl_msg_id_aux { KBASE_AUX_PM_STATE, - KBASE_AUX_ISSUE_JOB_SOFTSTOP, KBASE_AUX_JOB_SOFTSTOP, - KBASE_AUX_JOB_SOFTSTOP_EX, KBASE_AUX_PAGEFAULT, KBASE_AUX_PAGESALLOC }; @@ -167,7 +166,7 @@ enum tl_msg_id_aux { * @wbi: write buffer index * @rbi: read buffer index * @numbered: if non-zero stream's packets are sequentially numbered - * @autoflush_counter: counter tracking stream's autoflush state + * @last_write_time: timestamp indicating last write * * This structure holds information needed to construct proper packets in the * timeline stream. Each message in sequence must bear timestamp that is greater @@ -178,11 +177,6 @@ enum tl_msg_id_aux { * Each packet in timeline body stream has sequence number embedded (this value * must increment monotonically and is used by packets receiver to discover * buffer overflows. - * Autoflush counter is set to negative number when there is no data pending - * for flush and it is set to zero on every update of the buffer. Autoflush - * timer will increment the counter by one on every expiry. In case there will - * be no activity on the buffer during two consecutive timer expiries, stream - * buffer will be flushed. */ struct tl_stream { spinlock_t lock; @@ -196,7 +190,7 @@ struct tl_stream { atomic_t rbi; int numbered; - atomic_t autoflush_counter; + u64 last_write_time; }; /** @@ -247,6 +241,9 @@ static atomic_t autoflush_timer_active; * streams at any given time. */ static DEFINE_MUTEX(tl_reader_lock); +/* Indicator of whether the timeline stream file descriptor is already used. */ +static atomic_t tlstream_busy = {0}; + /* Timeline stream event queue. */ static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue); @@ -272,8 +269,8 @@ static const struct tp_desc tp_desc_obj[] = { KBASE_TL_NEW_CTX, __stringify(KBASE_TL_NEW_CTX), "object ctx is created", - "@pII", - "ctx,ctx_nr,tgid" + "@pI", + "ctx,ctx_nr" }, { KBASE_TL_NEW_GPU, @@ -408,20 +405,6 @@ static const struct tp_desc tp_desc_obj[] = { "@pp", "atom1,atom2" }, - { - KBASE_TL_NDEP_ATOM_ATOM, - __stringify(KBASE_TL_NDEP_ATOM_ATOM), - "atom2 no longer depends on atom1", - "@pp", - "atom1,atom2" - }, - { - KBASE_TL_RDEP_ATOM_ATOM, - __stringify(KBASE_TL_RDEP_ATOM_ATOM), - "resolved dependecy of atom2 depending on atom1", - "@pp", - "atom1,atom2" - }, { KBASE_TL_ATTRIB_ATOM_CONFIG, __stringify(KBASE_TL_ATTRIB_ATOM_CONFIG), @@ -454,13 +437,6 @@ static const struct tp_desc tp_desc_aux[] = { "@IL", "core_type,core_state_bitset" }, - { - KBASE_AUX_ISSUE_JOB_SOFTSTOP, - __stringify(KBASE_AUX_ISSUE_JOB_SOFTSTOP), - "Issuing job soft stop", - "@p", - "atom" - }, { KBASE_AUX_JOB_SOFTSTOP, __stringify(KBASE_AUX_JOB_SOFTSTOP), @@ -468,13 +444,6 @@ static const struct tp_desc tp_desc_aux[] = { "@I", "tag_id" }, - { - KBASE_AUX_JOB_SOFTSTOP_EX, - __stringify(KBASE_AUX_JOB_SOFTSTOP_EX), - "Job soft stop, more details", - "@pI", - "atom,job_type" - }, { KBASE_AUX_PAGEFAULT, __stringify(KBASE_AUX_PAGEFAULT), @@ -501,11 +470,6 @@ static atomic_t tlstream_bytes_generated = {0}; /*****************************************************************************/ -/* Indicator of whether the timeline stream file descriptor is used. */ -atomic_t kbase_tlstream_enabled = {0}; - -/*****************************************************************************/ - /** * kbasep_tlstream_get_timestamp - return timestamp * @@ -809,9 +773,6 @@ static size_t kbasep_tlstream_msgbuf_submit( unsigned int rb_idx_raw = atomic_read(&stream->rbi); unsigned int wb_idx = wb_idx_raw % PACKET_COUNT; - /* Set stream as flushed. */ - atomic_set(&stream->autoflush_counter, -1); - kbasep_tlstream_packet_header_update( stream->buffer[wb_idx].data, wb_size - PACKET_HEADER_SIZE); @@ -867,7 +828,7 @@ static size_t kbasep_tlstream_msgbuf_submit( static char *kbasep_tlstream_msgbuf_acquire( enum tl_stream_type stream_type, size_t msg_size, - unsigned long *flags) __acquires(&stream->lock) + unsigned long *flags) { struct tl_stream *stream; unsigned int wb_idx_raw; @@ -914,16 +875,14 @@ static char *kbasep_tlstream_msgbuf_acquire( */ static void kbasep_tlstream_msgbuf_release( enum tl_stream_type stream_type, - unsigned long flags) __releases(&stream->lock) + unsigned long flags) { struct tl_stream *stream; KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); stream = tl_stream[stream_type]; - - /* Mark stream as containing unflushed data. */ - atomic_set(&stream->autoflush_counter, 0); + stream->last_write_time = kbasep_tlstream_get_timestamp(); spin_unlock_irqrestore(&stream->lock, flags); } @@ -985,22 +944,6 @@ static void kbasep_tlstream_autoflush_timer_callback(unsigned long data) size_t wb_size; size_t min_size = PACKET_HEADER_SIZE; - int af_cnt = atomic_read(&stream->autoflush_counter); - - /* Check if stream contain unflushed data. */ - if (0 > af_cnt) - continue; - - /* Check if stream should be flushed now. */ - if (af_cnt != atomic_cmpxchg( - &stream->autoflush_counter, - af_cnt, - af_cnt + 1)) - continue; - if (!af_cnt) - continue; - - /* Autoflush this stream. */ if (stream->numbered) min_size += PACKET_NUMBER_SIZE; @@ -1011,11 +954,16 @@ static void kbasep_tlstream_autoflush_timer_callback(unsigned long data) wb_size = atomic_read(&stream->buffer[wb_idx].size); if (wb_size > min_size) { - wb_size = kbasep_tlstream_msgbuf_submit( - stream, wb_idx_raw, wb_size); - wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; - atomic_set(&stream->buffer[wb_idx].size, - wb_size); + u64 timestamp = kbasep_tlstream_get_timestamp(); + + if (timestamp - stream->last_write_time + > AUTOFLUSH_TIMEOUT) { + wb_size = kbasep_tlstream_msgbuf_submit( + stream, wb_idx_raw, wb_size); + wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; + atomic_set(&stream->buffer[wb_idx].size, + wb_size); + } } spin_unlock_irqrestore(&stream->lock, flags); } @@ -1096,7 +1044,7 @@ static ssize_t kbasep_tlstream_read( while (copy_len < size) { enum tl_stream_type stype; - unsigned int rb_idx_raw = 0; + unsigned int rb_idx_raw; unsigned int rb_idx; size_t rb_size; @@ -1186,12 +1134,7 @@ static int kbasep_tlstream_release(struct inode *inode, struct file *filp) KBASE_DEBUG_ASSERT(filp); CSTD_UNUSED(inode); CSTD_UNUSED(filp); - - /* Stop autoflush timer before releasing access to streams. */ - atomic_set(&autoflush_timer_active, 0); - del_timer_sync(&autoflush_timer); - - atomic_set(&kbase_tlstream_enabled, 0); + atomic_set(&tlstream_busy, 0); return 0; } @@ -1280,6 +1223,7 @@ static void kbasep_tlstream_timeline_header( int kbase_tlstream_init(void) { enum tl_stream_type i; + int rcode; /* Prepare stream structures. */ for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { @@ -1297,10 +1241,14 @@ int kbase_tlstream_init(void) } /* Initialize autoflush timer. */ - atomic_set(&autoflush_timer_active, 0); + atomic_set(&autoflush_timer_active, 1); setup_timer(&autoflush_timer, kbasep_tlstream_autoflush_timer_callback, 0); + rcode = mod_timer( + &autoflush_timer, + jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); + CSTD_UNUSED(rcode); return 0; } @@ -1309,6 +1257,9 @@ void kbase_tlstream_term(void) { enum tl_stream_type i; + atomic_set(&autoflush_timer_active, 0); + del_timer_sync(&autoflush_timer); + for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { kbasep_timeline_stream_term(tl_stream[i]); kfree(tl_stream[i]); @@ -1317,16 +1268,14 @@ void kbase_tlstream_term(void) int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd) { - if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, 1)) { - int rcode; - + if (0 == atomic_cmpxchg(&tlstream_busy, 0, 1)) { *fd = anon_inode_getfd( "[mali_tlstream]", &kbasep_tlstream_fops, kctx, O_RDONLY | O_CLOEXEC); if (0 > *fd) { - atomic_set(&kbase_tlstream_enabled, 0); + atomic_set(&tlstream_busy, 0); return *fd; } @@ -1345,14 +1294,6 @@ int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd) TL_STREAM_TYPE_AUX_HEADER, tp_desc_aux, ARRAY_SIZE(tp_desc_aux)); - - /* Start autoflush timer. */ - atomic_set(&autoflush_timer_active, 1); - rcode = mod_timer( - &autoflush_timer, - jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); - CSTD_UNUSED(rcode); - } else { *fd = -EBUSY; } @@ -1388,12 +1329,11 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated) /*****************************************************************************/ -void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid) +void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr) { const u32 msg_id = KBASE_TL_NEW_CTX; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) + - sizeof(tgid); + sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr); unsigned long flags; char *buffer; size_t pos = 0; @@ -1409,15 +1349,12 @@ void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid) buffer, pos, &context, sizeof(context)); pos = kbasep_tlstream_write_bytes( buffer, pos, &nr, sizeof(nr)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &tgid, sizeof(tgid)); - KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) +void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) { const u32 msg_id = KBASE_TL_NEW_GPU; const size_t msg_size = @@ -1445,7 +1382,7 @@ void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) +void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) { const u32 msg_id = KBASE_TL_NEW_LPU; const size_t msg_size = @@ -1473,7 +1410,7 @@ void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) +void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) { const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU; const size_t msg_size = @@ -1498,7 +1435,7 @@ void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr) +void kbase_tlstream_tl_summary_new_as(void *as, u32 nr) { const u32 msg_id = KBASE_TL_NEW_AS; const size_t msg_size = @@ -1523,7 +1460,7 @@ void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); } -void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) +void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) { const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU; const size_t msg_size = @@ -1550,12 +1487,11 @@ void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu) /*****************************************************************************/ -void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid) +void kbase_tlstream_tl_new_ctx(void *context, u32 nr) { const u32 msg_id = KBASE_TL_NEW_CTX; const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) + - sizeof(tgid); + sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr); unsigned long flags; char *buffer; size_t pos = 0; @@ -1571,14 +1507,12 @@ void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid) buffer, pos, &context, sizeof(context)); pos = kbasep_tlstream_write_bytes( buffer, pos, &nr, sizeof(nr)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &tgid, sizeof(tgid)); KBASE_DEBUG_ASSERT(msg_size == pos); kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_new_atom(void *atom, u32 nr) +void kbase_tlstream_tl_new_atom(void *atom, u32 nr) { const u32 msg_id = KBASE_TL_NEW_ATOM; const size_t msg_size = @@ -1603,7 +1537,7 @@ void __kbase_tlstream_tl_new_atom(void *atom, u32 nr) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_del_ctx(void *context) +void kbase_tlstream_tl_del_ctx(void *context) { const u32 msg_id = KBASE_TL_DEL_CTX; const size_t msg_size = @@ -1626,7 +1560,7 @@ void __kbase_tlstream_tl_del_ctx(void *context) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_del_atom(void *atom) +void kbase_tlstream_tl_del_atom(void *atom) { const u32 msg_id = KBASE_TL_DEL_ATOM; const size_t msg_size = @@ -1649,7 +1583,7 @@ void __kbase_tlstream_tl_del_atom(void *atom) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) +void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) { const u32 msg_id = KBASE_TL_RET_CTX_LPU; const size_t msg_size = @@ -1674,7 +1608,7 @@ void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) +void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) { const u32 msg_id = KBASE_TL_RET_ATOM_CTX; const size_t msg_size = @@ -1699,7 +1633,7 @@ void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_ret_atom_lpu( +void kbase_tlstream_tl_ret_atom_lpu( void *atom, void *lpu, const char *attrib_match_list) { const u32 msg_id = KBASE_TL_RET_ATOM_LPU; @@ -1730,7 +1664,7 @@ void __kbase_tlstream_tl_ret_atom_lpu( kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) +void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) { const u32 msg_id = KBASE_TL_NRET_CTX_LPU; const size_t msg_size = @@ -1755,7 +1689,7 @@ void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) +void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) { const u32 msg_id = KBASE_TL_NRET_ATOM_CTX; const size_t msg_size = @@ -1780,7 +1714,7 @@ void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2) +void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2) { const u32 msg_id = KBASE_TL_DEP_ATOM_ATOM; const size_t msg_size = @@ -1805,57 +1739,7 @@ void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2) -{ - const u32 msg_id = KBASE_TL_NDEP_ATOM_ATOM; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom1, sizeof(atom1)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom2, sizeof(atom2)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2) -{ - const u32 msg_id = KBASE_TL_RDEP_ATOM_ATOM; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_OBJ, - msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom1, sizeof(atom1)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &atom2, sizeof(atom2)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); -} - -void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) +void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) { const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; const size_t msg_size = @@ -1880,7 +1764,7 @@ void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) +void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) { const u32 msg_id = KBASE_TL_RET_AS_CTX; const size_t msg_size = @@ -1905,7 +1789,7 @@ void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) +void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) { const u32 msg_id = KBASE_TL_NRET_AS_CTX; const size_t msg_size = @@ -1930,7 +1814,7 @@ void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as) +void kbase_tlstream_tl_ret_atom_as(void *atom, void *as) { const u32 msg_id = KBASE_TL_RET_ATOM_AS; const size_t msg_size = @@ -1955,7 +1839,7 @@ void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as) +void kbase_tlstream_tl_nret_atom_as(void *atom, void *as) { const u32 msg_id = KBASE_TL_NRET_ATOM_AS; const size_t msg_size = @@ -1980,7 +1864,7 @@ void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_attrib_atom_config( +void kbase_tlstream_tl_attrib_atom_config( void *atom, u64 jd, u64 affinity, u32 config) { const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG; @@ -2011,7 +1895,7 @@ void __kbase_tlstream_tl_attrib_atom_config( kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_tl_attrib_as_config( +void kbase_tlstream_tl_attrib_as_config( void *as, u64 transtab, u64 memattr, u64 transcfg) { const u32 msg_id = KBASE_TL_ATTRIB_AS_CONFIG; @@ -2042,7 +1926,7 @@ void __kbase_tlstream_tl_attrib_as_config( kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); } -void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) +void kbase_tlstream_jd_gpu_soft_reset(void *gpu) { const u32 msg_id = KBASE_JD_GPU_SOFT_RESET; const size_t msg_size = @@ -2067,7 +1951,7 @@ void __kbase_tlstream_jd_gpu_soft_reset(void *gpu) /*****************************************************************************/ -void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state) +void kbase_tlstream_aux_pm_state(u32 core_type, u64 state) { const u32 msg_id = KBASE_AUX_PM_STATE; const size_t msg_size = @@ -2092,28 +1976,7 @@ void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void __kbase_tlstream_aux_issue_job_softstop(void *katom) -{ - const u32 msg_id = KBASE_AUX_ISSUE_JOB_SOFTSTOP; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(katom); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -} - -void __kbase_tlstream_aux_job_softstop(u32 js_id) +void kbase_tlstream_aux_job_softstop(u32 js_id) { const u32 msg_id = KBASE_AUX_JOB_SOFTSTOP; const size_t msg_size = @@ -2135,66 +1998,7 @@ void __kbase_tlstream_aux_job_softstop(u32 js_id) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -/** - * __kbase_tlstream_aux_job_softstop_ex_record - record the trace point - * @katom: the atom that has been soft-stopped - * @job_type: the job type - */ -static void __kbase_tlstream_aux_job_softstop_ex_record( - void *katom, u32 job_type) -{ - const u32 msg_id = KBASE_AUX_JOB_SOFTSTOP_EX; - const size_t msg_size = - sizeof(msg_id) + sizeof(u64) + sizeof(katom) + sizeof(job_type); - unsigned long flags; - char *buffer; - size_t pos = 0; - - buffer = kbasep_tlstream_msgbuf_acquire( - TL_STREAM_TYPE_AUX, msg_size, &flags); - KBASE_DEBUG_ASSERT(buffer); - - pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); - pos = kbasep_tlstream_write_timestamp(buffer, pos); - pos = kbasep_tlstream_write_bytes(buffer, pos, &katom, sizeof(katom)); - pos = kbasep_tlstream_write_bytes( - buffer, pos, &job_type, sizeof(job_type)); - KBASE_DEBUG_ASSERT(msg_size == pos); - - kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); -} - -void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom) -{ - struct kbase_context *kctx = katom->kctx; - u64 jd = katom->jc; - - while (jd != 0) { - struct job_descriptor_header *job; - struct kbase_vmap_struct map; - - job = kbase_vmap(kctx, jd, sizeof(*job), &map); - if (!job) { - dev_err(kctx->kbdev->dev, - "__kbase_tlstream_aux_job_softstop_ex: failed to map job descriptor 0x%llx for atom 0x%p\n", - jd, (void *)katom); - break; - } - if (job->exception_status != BASE_JD_EVENT_STOPPED) { - kbase_vunmap(kctx, &map); - break; - } - - __kbase_tlstream_aux_job_softstop_ex_record( - katom, job->job_type); - - jd = job->job_descriptor_size ? - job->next_job._64 : job->next_job._32; - kbase_vunmap(kctx, &map); - } -} - -void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) +void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) { const u32 msg_id = KBASE_AUX_PAGEFAULT; const size_t msg_size = @@ -2219,7 +2023,7 @@ void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change) kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); } -void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count) +void kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count) { const u32 msg_id = KBASE_AUX_PAGESALLOC; const size_t msg_size = diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h index 22a0d96f9a720..eccd469a86e8b 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -102,67 +102,18 @@ void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated); /*****************************************************************************/ -void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid); -void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); -void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); -void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); -void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr); -void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu); -void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid); -void __kbase_tlstream_tl_new_atom(void *atom, u32 nr); -void __kbase_tlstream_tl_del_ctx(void *context); -void __kbase_tlstream_tl_del_atom(void *atom); -void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu); -void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); -void __kbase_tlstream_tl_ret_atom_lpu( - void *atom, void *lpu, const char *attrib_match_list); -void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu); -void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); -void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); -void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); -void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); -void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as); -void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as); -void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2); -void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2); -void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2); -void __kbase_tlstream_tl_attrib_atom_config( - void *atom, u64 jd, u64 affinity, u32 config); -void __kbase_tlstream_tl_attrib_as_config( - void *as, u64 transtab, u64 memattr, u64 transcfg); -void __kbase_tlstream_jd_gpu_soft_reset(void *gpu); -void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state); -void __kbase_tlstream_aux_issue_job_softstop(void *katom); -void __kbase_tlstream_aux_job_softstop(u32 js_id); -void __kbase_tlstream_aux_job_softstop_ex(struct kbase_jd_atom *katom); -void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); -void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count); - -extern atomic_t kbase_tlstream_enabled; - -#define __TRACE_IF_ENABLED(trace_name, ...) \ - do { \ - int enabled = atomic_read(&kbase_tlstream_enabled); \ - if (enabled) \ - __kbase_tlstream_##trace_name(__VA_ARGS__); \ - } while (0) - -/*****************************************************************************/ - /** * kbase_tlstream_tl_summary_new_ctx - create context object in timeline * summary * @context: name of the context object * @nr: context number - * @tgid: thread Group Id * * Function emits a timeline message informing about context creation. Context * is created with context number (its attribute), that can be used to link * kbase context with userspace context. * This message is directed to timeline summary stream. */ -#define kbase_tlstream_tl_summary_new_ctx(context, nr, tgid) \ - __TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid) +void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr); /** * kbase_tlstream_tl_summary_new_gpu - create GPU object in timeline summary @@ -174,8 +125,7 @@ extern atomic_t kbase_tlstream_enabled; * created with two attributes: id and core count. * This message is directed to timeline summary stream. */ -#define kbase_tlstream_tl_summary_new_gpu(gpu, id, core_count) \ - __TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count) +void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); /** * kbase_tlstream_tl_summary_new_lpu - create LPU object in timeline summary @@ -188,8 +138,7 @@ extern atomic_t kbase_tlstream_enabled; * and function bearing information about this LPU abilities. * This message is directed to timeline summary stream. */ -#define kbase_tlstream_tl_summary_new_lpu(lpu, nr, fn) \ - __TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn) +void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); /** * kbase_tlstream_tl_summary_lifelink_lpu_gpu - lifelink LPU object to GPU @@ -200,8 +149,7 @@ extern atomic_t kbase_tlstream_enabled; * along with GPU object. * This message is directed to timeline summary stream. */ -#define kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, gpu) \ - __TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu) +void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); /** * kbase_tlstream_tl_summary_new_as - create address space object in timeline summary @@ -213,8 +161,7 @@ extern atomic_t kbase_tlstream_enabled; * address space. * This message is directed to timeline summary stream. */ -#define kbase_tlstream_tl_summary_new_as(as, nr) \ - __TRACE_IF_ENABLED(tl_summary_new_as, as, nr) +void kbase_tlstream_tl_summary_new_as(void *as, u32 nr); /** * kbase_tlstream_tl_summary_lifelink_as_gpu - lifelink address space object to GPU @@ -225,21 +172,18 @@ extern atomic_t kbase_tlstream_enabled; * shall be deleted along with GPU object. * This message is directed to timeline summary stream. */ -#define kbase_tlstream_tl_summary_lifelink_as_gpu(as, gpu) \ - __TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu) +void kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu); /** * kbase_tlstream_tl_new_ctx - create context object in timeline * @context: name of the context object * @nr: context number - * @tgid: thread Group Id * * Function emits a timeline message informing about context creation. Context * is created with context number (its attribute), that can be used to link * kbase context with userspace context. */ -#define kbase_tlstream_tl_new_ctx(context, nr, tgid) \ - __TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid) +void kbase_tlstream_tl_new_ctx(void *context, u32 nr); /** * kbase_tlstream_tl_new_atom - create atom object in timeline @@ -250,8 +194,7 @@ extern atomic_t kbase_tlstream_enabled; * created with atom number (its attribute) that links it with actual work * bucket id understood by hardware. */ -#define kbase_tlstream_tl_new_atom(atom, nr) \ - __TRACE_IF_ENABLED(tl_new_atom, atom, nr) +void kbase_tlstream_tl_new_atom(void *atom, u32 nr); /** * kbase_tlstream_tl_del_ctx - destroy context object in timeline @@ -260,8 +203,7 @@ extern atomic_t kbase_tlstream_enabled; * Function emits a timeline message informing that context object ceased to * exist. */ -#define kbase_tlstream_tl_del_ctx(context) \ - __TRACE_IF_ENABLED(tl_del_ctx, context) +void kbase_tlstream_tl_del_ctx(void *context); /** * kbase_tlstream_tl_del_atom - destroy atom object in timeline @@ -270,8 +212,7 @@ extern atomic_t kbase_tlstream_enabled; * Function emits a timeline message informing that atom object ceased to * exist. */ -#define kbase_tlstream_tl_del_atom(atom) \ - __TRACE_IF_ENABLED(tl_del_atom, atom) +void kbase_tlstream_tl_del_atom(void *atom); /** * kbase_tlstream_tl_ret_ctx_lpu - retain context by LPU @@ -281,8 +222,7 @@ extern atomic_t kbase_tlstream_enabled; * Function emits a timeline message informing that context is being held * by LPU and must not be deleted unless it is released. */ -#define kbase_tlstream_tl_ret_ctx_lpu(context, lpu) \ - __TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu) +void kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu); /** * kbase_tlstream_tl_ret_atom_ctx - retain atom by context @@ -292,8 +232,7 @@ extern atomic_t kbase_tlstream_enabled; * Function emits a timeline message informing that atom object is being held * by context and must not be deleted unless it is released. */ -#define kbase_tlstream_tl_ret_atom_ctx(atom, context) \ - __TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context) +void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); /** * kbase_tlstream_tl_ret_atom_lpu - retain atom by LPU @@ -304,8 +243,8 @@ extern atomic_t kbase_tlstream_enabled; * Function emits a timeline message informing that atom object is being held * by LPU and must not be deleted unless it is released. */ -#define kbase_tlstream_tl_ret_atom_lpu(atom, lpu, attrib_match_list) \ - __TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list) +void kbase_tlstream_tl_ret_atom_lpu( + void *atom, void *lpu, const char *attrib_match_list); /** * kbase_tlstream_tl_nret_ctx_lpu - release context by LPU @@ -315,8 +254,7 @@ extern atomic_t kbase_tlstream_enabled; * Function emits a timeline message informing that context is being released * by LPU object. */ -#define kbase_tlstream_tl_nret_ctx_lpu(context, lpu) \ - __TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu) +void kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu); /** * kbase_tlstream_tl_nret_atom_ctx - release atom by context @@ -326,8 +264,7 @@ extern atomic_t kbase_tlstream_enabled; * Function emits a timeline message informing that atom object is being * released by context. */ -#define kbase_tlstream_tl_nret_atom_ctx(atom, context) \ - __TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context) +void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); /** * kbase_tlstream_tl_nret_atom_lpu - release atom by LPU @@ -337,8 +274,7 @@ extern atomic_t kbase_tlstream_enabled; * Function emits a timeline message informing that atom object is being * released by LPU. */ -#define kbase_tlstream_tl_nret_atom_lpu(atom, lpu) \ - __TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu) +void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); /** * kbase_tlstream_tl_ret_as_ctx - lifelink address space object to context @@ -348,8 +284,7 @@ extern atomic_t kbase_tlstream_enabled; * Function emits a timeline message informing that address space object * is being held by the context object. */ -#define kbase_tlstream_tl_ret_as_ctx(as, ctx) \ - __TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx) +void kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx); /** * kbase_tlstream_tl_nret_as_ctx - release address space by context @@ -359,8 +294,7 @@ extern atomic_t kbase_tlstream_enabled; * Function emits a timeline message informing that address space object * is being released by atom. */ -#define kbase_tlstream_tl_nret_as_ctx(as, ctx) \ - __TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx) +void kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx); /** * kbase_tlstream_tl_ret_atom_as - retain atom by address space @@ -370,8 +304,7 @@ extern atomic_t kbase_tlstream_enabled; * Function emits a timeline message informing that atom object is being held * by address space and must not be deleted unless it is released. */ -#define kbase_tlstream_tl_ret_atom_as(atom, as) \ - __TRACE_IF_ENABLED(tl_ret_atom_as, atom, as) +void kbase_tlstream_tl_ret_atom_as(void *atom, void *as); /** * kbase_tlstream_tl_nret_atom_as - release atom by address space @@ -381,8 +314,7 @@ extern atomic_t kbase_tlstream_enabled; * Function emits a timeline message informing that atom object is being * released by address space. */ -#define kbase_tlstream_tl_nret_atom_as(atom, as) \ - __TRACE_IF_ENABLED(tl_nret_atom_as, atom, as) +void kbase_tlstream_tl_nret_atom_as(void *atom, void *as); /** * kbase_tlstream_tl_dep_atom_atom - parent atom depends on child atom @@ -392,30 +324,7 @@ extern atomic_t kbase_tlstream_enabled; * Function emits a timeline message informing that parent atom waits for * child atom object to be completed before start its execution. */ -#define kbase_tlstream_tl_dep_atom_atom(atom1, atom2) \ - __TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2) - -/** - * kbase_tlstream_tl_ndep_atom_atom - dependency between atoms resolved - * @atom1: name of the child atom object - * @atom2: name of the parent atom object that depended on child atom - * - * Function emits a timeline message informing that parent atom execution - * dependency on child atom has been resolved. - */ -#define kbase_tlstream_tl_ndep_atom_atom(atom1, atom2) \ - __TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2) - -/** - * kbase_tlstream_tl_rdep_atom_atom - information about already resolved dependency between atoms - * @atom1: name of the child atom object - * @atom2: name of the parent atom object that depended on child atom - * - * Function emits a timeline message informing that parent atom execution - * dependency on child atom has been resolved. - */ -#define kbase_tlstream_tl_rdep_atom_atom(atom1, atom2) \ - __TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2) +void kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2); /** * kbase_tlstream_tl_attrib_atom_config - atom job slot attributes @@ -426,8 +335,8 @@ extern atomic_t kbase_tlstream_enabled; * * Function emits a timeline message containing atom attributes. */ -#define kbase_tlstream_tl_attrib_atom_config(atom, jd, affinity, config) \ - __TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config) +void kbase_tlstream_tl_attrib_atom_config( + void *atom, u64 jd, u64 affinity, u32 config); /** * kbase_tlstream_tl_attrib_as_config - address space attributes @@ -438,8 +347,8 @@ extern atomic_t kbase_tlstream_enabled; * * Function emits a timeline message containing address space attributes. */ -#define kbase_tlstream_tl_attrib_as_config(as, transtab, memattr, transcfg) \ - __TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg) +void kbase_tlstream_tl_attrib_as_config( + void *as, u64 transtab, u64 memattr, u64 transcfg); /** * kbase_tlstream_jd_gpu_soft_reset - The GPU is being soft reset @@ -448,44 +357,20 @@ extern atomic_t kbase_tlstream_enabled; * This imperative tracepoint is specific to job dumping. * Function emits a timeline message indicating GPU soft reset. */ -#define kbase_tlstream_jd_gpu_soft_reset(gpu) \ - __TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu) +void kbase_tlstream_jd_gpu_soft_reset(void *gpu); /** * kbase_tlstream_aux_pm_state - timeline message: power management state * @core_type: core type (shader, tiler, l2 cache, l3 cache) * @state: 64bits bitmask reporting power state of the cores (1-ON, 0-OFF) */ -#define kbase_tlstream_aux_pm_state(core_type, state) \ - __TRACE_IF_ENABLED(aux_pm_state, core_type, state) - -/** - * kbase_tlstream_aux_issue_job_softstop - a soft-stop command is being issued - * @katom: the atom that is being soft-stopped - */ -#define kbase_tlstream_aux_issue_job_softstop(katom) \ - __TRACE_IF_ENABLED(aux_issue_job_softstop, katom) +void kbase_tlstream_aux_pm_state(u32 core_type, u64 state); /** * kbase_tlstream_aux_job_softstop - soft job stop occurred * @js_id: job slot id */ -#define kbase_tlstream_aux_job_softstop(js_id) \ - __TRACE_IF_ENABLED(aux_job_softstop, js_id) - -/** - * kbase_tlstream_aux_job_softstop_ex - extra info about soft-stopped atom - * @katom: the atom that has been soft-stopped - * - * This trace point adds more details about the soft-stopped atom. These details - * can't be safety collected inside the interrupt handler so we're doing it - * inside a worker. - * - * Note: this is not the same information that is recorded in the trace point, - * refer to __kbase_tlstream_aux_job_softstop_ex() for more details. - */ -#define kbase_tlstream_aux_job_softstop_ex(katom) \ - __TRACE_IF_ENABLED(aux_job_softstop_ex, katom) +void kbase_tlstream_aux_job_softstop(u32 js_id); /** * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event @@ -493,8 +378,7 @@ extern atomic_t kbase_tlstream_enabled; * @ctx_nr: kernel context number * @page_count_change: number of pages to be added */ -#define kbase_tlstream_aux_pagefault(ctx_nr, page_count_change) \ - __TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change) +void kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change); /** * kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated @@ -502,8 +386,7 @@ extern atomic_t kbase_tlstream_enabled; * @ctx_nr: kernel context number * @page_count: number of pages used by the context */ -#define kbase_tlstream_aux_pagesalloc(ctx_nr, page_count) \ - __TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count) +void kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count); #endif /* _KBASE_TLSTREAM_H */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c index a606ae810656d..aac9858875ad4 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -102,7 +102,7 @@ static const struct file_operations kbasep_trace_timeline_debugfs_fops = { .open = kbasep_trace_timeline_debugfs_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_private, }; void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev) diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h index 22a36494e72e2..d92caf054804f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -213,6 +213,7 @@ void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev); js, _producerof_atom_number_completed); \ } while (0) + /** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a * certin caller */ #define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) \ @@ -318,6 +319,7 @@ void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev); #define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP() + static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom, int js) { diff --git a/drivers/gpu/arm/midgard/mali_kbase_uku.h b/drivers/gpu/arm/midgard/mali_kbase_uku.h index e880d9663d0eb..39514685b7527 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_uku.h +++ b/drivers/gpu/arm/midgard/mali_kbase_uku.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2008-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2008-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -45,20 +45,9 @@ * 10.1: * - Do mmap in kernel for SAME_VA memory allocations rather then * calling back into the kernel as a 2nd stage of the allocation request. - * - * 10.2: - * - Add KBASE_FUNC_MEM_JIT_INIT which allows clients to request a custom VA - * region for use with JIT (ignored on 32-bit platforms) - * - * 10.3: - * - base_jd_core_req typedef-ed to u32 (instead of to u16) - * - two flags added: BASE_JD_REQ_SKIP_CACHE_STAT / _END - * - * 10.4: - * - Removed KBASE_FUNC_EXT_BUFFER_LOCK used only in internal tests */ #define BASE_UK_VERSION_MAJOR 10 -#define BASE_UK_VERSION_MINOR 4 +#define BASE_UK_VERSION_MINOR 1 struct kbase_uk_mem_alloc { union uk_header header; @@ -303,6 +292,16 @@ struct kbase_uk_model_control_params { }; #endif /* SUPPORT_MALI_NO_MALI */ +#define KBASE_MAXIMUM_EXT_RESOURCES 255 + +struct kbase_uk_ext_buff_kds_data { + union uk_header header; + union kbase_pointer external_resource; + union kbase_pointer file_descriptor; + u32 num_res; /* limited to KBASE_MAXIMUM_EXT_RESOURCES */ + u32 padding; +}; + #ifdef BASE_LEGACY_UK8_SUPPORT struct kbase_uk_keep_gpu_powered { union uk_header header; @@ -328,8 +327,8 @@ struct kbase_uk_context_id { int id; }; -#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ - !defined(MALI_MIPE_ENABLED) +#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \ + defined(CONFIG_MALI_MIPE_ENABLED) /** * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure * @header: UK structure header @@ -397,7 +396,7 @@ struct kbase_uk_tlstream_stats { u32 bytes_generated; }; #endif /* MALI_UNIT_TEST */ -#endif /* MALI_MIPE_ENABLED */ +#endif /* MALI_KTLSTREAM_ENABLED */ /** * struct struct kbase_uk_prfcnt_value for the KBASE_FUNC_SET_PRFCNT_VALUES ioctl @@ -412,38 +411,6 @@ struct kbase_uk_prfcnt_values { u32 size; }; -/** - * struct kbase_uk_soft_event_update - User/Kernel space data exchange structure - * @header: UK structure header - * @evt: the GPU address containing the event - * @new_status: the new event status, must be either BASE_JD_SOFT_EVENT_SET or - * BASE_JD_SOFT_EVENT_RESET - * @flags: reserved for future uses, must be set to 0 - * - * This structure is used to update the status of a software event. If the - * event's status is set to BASE_JD_SOFT_EVENT_SET, any job currently waiting - * on this event will complete. - */ -struct kbase_uk_soft_event_update { - union uk_header header; - /* IN */ - u64 evt; - u32 new_status; - u32 flags; -}; - -/** - * struct kbase_uk_mem_jit_init - User/Kernel space data exchange structure - * @header: UK structure header - * @va_pages: Number of virtual pages required for JIT - * - * This structure is used when requesting initialization of JIT. - */ -struct kbase_uk_mem_jit_init { - union uk_header header; - /* IN */ - u64 va_pages; -}; enum kbase_uk_function_id { KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0), @@ -471,6 +438,7 @@ enum kbase_uk_function_id { KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15), KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16), + KBASE_FUNC_EXT_BUFFER_LOCK = (UK_FUNC_ID + 17), KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18), KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19), @@ -495,15 +463,15 @@ enum kbase_uk_function_id { KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31), -#if (defined(MALI_MIPE_ENABLED) && MALI_MIPE_ENABLED) || \ - !defined(MALI_MIPE_ENABLED) +#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \ + defined(CONFIG_MALI_MIPE_ENABLED) KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 32), #if MALI_UNIT_TEST KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33), KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34), #endif /* MALI_UNIT_TEST */ KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35), -#endif /* MALI_MIPE_ENABLED */ +#endif /* MALI_KTLSTREAM_ENABLED */ KBASE_FUNC_HWCNT_READER_SETUP = (UK_FUNC_ID + 36), @@ -511,10 +479,6 @@ enum kbase_uk_function_id { KBASE_FUNC_SET_PRFCNT_VALUES = (UK_FUNC_ID + 37), #endif - KBASE_FUNC_SOFT_EVENT_UPDATE = (UK_FUNC_ID + 38), - - KBASE_FUNC_MEM_JIT_INIT = (UK_FUNC_ID + 39), - KBASE_FUNC_MAX }; diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c index bd6095f77480f..d3d27e2958d7a 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,10 +28,8 @@ #include #include -#include #include #include -#include /*****************************************************************************/ @@ -63,14 +61,6 @@ enum { JM_HWCNT_BM }; -enum vinstr_state { - VINSTR_IDLE, - VINSTR_DUMPING, - VINSTR_SUSPENDING, - VINSTR_SUSPENDED, - VINSTR_RESUMING -}; - /** * struct kbase_vinstr_context - vinstr context per device * @lock: protects the entire vinstr context @@ -84,12 +74,7 @@ enum vinstr_state { * with hardware * @reprogram: when true, reprogram hwcnt block with the new set of * counters - * @state: vinstr state - * @state_lock: protects information about vinstr state - * @suspend_waitq: notification queue to trigger state re-validation - * @suspend_cnt: reference counter of vinstr's suspend state - * @suspend_work: worker to execute on entering suspended state - * @resume_work: worker to execute on leaving suspended state + * @suspended: when true, the context has been suspended * @nclients: number of attached clients, pending or otherwise * @waiting_clients: head of list of clients being periodically sampled * @idle_clients: head of list of clients being idle @@ -109,13 +94,7 @@ struct kbase_vinstr_context { size_t dump_size; u32 bitmap[4]; bool reprogram; - - enum vinstr_state state; - struct spinlock state_lock; - wait_queue_head_t suspend_waitq; - unsigned int suspend_cnt; - struct work_struct suspend_work; - struct work_struct resume_work; + bool suspended; u32 nclients; struct list_head waiting_clients; @@ -210,10 +189,7 @@ static const struct file_operations vinstr_client_fops = { static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) { - struct kbase_context *kctx = vinstr_ctx->kctx; - struct kbase_device *kbdev = kctx->kbdev; struct kbase_uk_hwcnt_setup setup; - int err; setup.dump_buffer = vinstr_ctx->gpu_va; setup.jm_bm = vinstr_ctx->bitmap[JM_HWCNT_BM]; @@ -221,46 +197,12 @@ static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) setup.shader_bm = vinstr_ctx->bitmap[SHADER_HWCNT_BM]; setup.mmu_l2_bm = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM]; - /* Mark the context as active so the GPU is kept turned on */ - /* A suspend won't happen here, because we're in a syscall from a - * userspace thread. */ - kbase_pm_context_active(kbdev); - - /* Schedule the context in */ - kbasep_js_schedule_privileged_ctx(kbdev, kctx); - err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &setup); - if (err) { - /* Release the context. This had its own Power Manager Active - * reference */ - kbasep_js_release_privileged_ctx(kbdev, kctx); - - /* Also release our Power Manager Active reference */ - kbase_pm_context_idle(kbdev); - } - - return err; + return kbase_instr_hwcnt_enable(vinstr_ctx->kctx, &setup); } static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx) { - struct kbase_context *kctx = vinstr_ctx->kctx; - struct kbase_device *kbdev = kctx->kbdev; - int err; - - err = kbase_instr_hwcnt_disable_internal(kctx); - if (err) { - dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)", - kctx); - return; - } - - /* Release the context. This had its own Power Manager Active reference. */ - kbasep_js_release_privileged_ctx(kbdev, kctx); - - /* Also release our Power Manager Active reference. */ - kbase_pm_context_idle(kbdev); - - dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx); + kbase_instr_hwcnt_disable(vinstr_ctx->kctx); } static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx) @@ -367,10 +309,6 @@ static void kbasep_vinstr_unmap_kernel_dump_buffer( */ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) { - struct kbase_device *kbdev = vinstr_ctx->kbdev; - struct kbasep_kctx_list_element *element; - unsigned long flags; - bool enable_backend = false; int err; vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true); @@ -386,48 +324,10 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) return err; } - /* Add kernel context to list of contexts associated with device. */ - element = kzalloc(sizeof(*element), GFP_KERNEL); - if (element) { - element->kctx = vinstr_ctx->kctx; - mutex_lock(&kbdev->kctx_list_lock); - list_add(&element->link, &kbdev->kctx_list); - - /* Inform timeline client about new context. - * Do this while holding the lock to avoid tracepoint - * being created in both body and summary stream. */ - kbase_tlstream_tl_new_ctx( - vinstr_ctx->kctx, - (u32)(vinstr_ctx->kctx->id), - (u32)(vinstr_ctx->kctx->tgid)); - - mutex_unlock(&kbdev->kctx_list_lock); - } else { - /* Don't treat this as a fail - just warn about it. */ - dev_warn(kbdev->dev, - "couldn't add kctx to kctx_list\n"); - } - - /* Don't enable hardware counters if vinstr is suspended. - * Note that vinstr resume code is run under vinstr context lock, - * lower layer will be enabled as needed on resume. */ - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - if (VINSTR_IDLE == vinstr_ctx->state) - enable_backend = true; - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - if (enable_backend) - err = enable_hwcnt(vinstr_ctx); - + err = enable_hwcnt(vinstr_ctx); if (err) { kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); kbase_destroy_context(vinstr_ctx->kctx); - if (element) { - mutex_lock(&kbdev->kctx_list_lock); - list_del(&element->link); - kfree(element); - mutex_unlock(&kbdev->kctx_list_lock); - } - kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx); vinstr_ctx->kctx = NULL; return err; } @@ -440,13 +340,6 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) disable_hwcnt(vinstr_ctx); kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); kbase_destroy_context(vinstr_ctx->kctx); - if (element) { - mutex_lock(&kbdev->kctx_list_lock); - list_del(&element->link); - kfree(element); - mutex_unlock(&kbdev->kctx_list_lock); - } - kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx); vinstr_ctx->kctx = NULL; return -EFAULT; } @@ -460,34 +353,11 @@ static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx) */ static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx) { - struct kbase_device *kbdev = vinstr_ctx->kbdev; - struct kbasep_kctx_list_element *element; - struct kbasep_kctx_list_element *tmp; - bool found = false; - /* Release hw counters dumping resources. */ vinstr_ctx->thread = NULL; disable_hwcnt(vinstr_ctx); kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx); kbase_destroy_context(vinstr_ctx->kctx); - - /* Remove kernel context from the device's contexts list. */ - mutex_lock(&kbdev->kctx_list_lock); - list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) { - if (element->kctx == vinstr_ctx->kctx) { - list_del(&element->link); - kfree(element); - found = true; - } - } - mutex_unlock(&kbdev->kctx_list_lock); - - if (!found) - dev_warn(kbdev->dev, "kctx not in kctx_list\n"); - - /* Inform timeline client about context destruction. */ - kbase_tlstream_tl_del_ctx(vinstr_ctx->kctx); - vinstr_ctx->kctx = NULL; } @@ -509,10 +379,9 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( struct kbase_vinstr_client *cli; KBASE_DEBUG_ASSERT(vinstr_ctx); - - if (buffer_count > MAX_BUFFER_COUNT - || (buffer_count & (buffer_count - 1))) - return NULL; + KBASE_DEBUG_ASSERT(buffer_count >= 0); + KBASE_DEBUG_ASSERT(buffer_count <= MAX_BUFFER_COUNT); + KBASE_DEBUG_ASSERT(!(buffer_count & (buffer_count - 1))); cli = kzalloc(sizeof(*cli), GFP_KERNEL); if (!cli) @@ -566,7 +435,7 @@ static struct kbase_vinstr_client *kbasep_vinstr_attach_client( /* Allocate required number of dumping buffers. */ cli->dump_buffers = (char *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, + GFP_KERNEL, get_order(cli->dump_size * cli->buffer_count)); if (!cli->dump_buffers) goto error; @@ -933,7 +802,6 @@ static void kbasep_vinstr_add_dump_request( static int kbasep_vinstr_collect_and_accumulate( struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp) { - unsigned long flags; int rcode; #ifdef CONFIG_MALI_NO_MALI @@ -941,15 +809,6 @@ static int kbasep_vinstr_collect_and_accumulate( gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va); #endif - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - if (VINSTR_IDLE != vinstr_ctx->state) { - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - return -EAGAIN; - } else { - vinstr_ctx->state = VINSTR_DUMPING; - } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - /* Request HW counters dump. * Disable preemption to make dump timestamp more accurate. */ preempt_disable(); @@ -961,21 +820,6 @@ static int kbasep_vinstr_collect_and_accumulate( rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx); WARN_ON(rcode); - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - switch (vinstr_ctx->state) - { - case VINSTR_SUSPENDING: - schedule_work(&vinstr_ctx->suspend_work); - break; - case VINSTR_DUMPING: - vinstr_ctx->state = VINSTR_IDLE; - wake_up_all(&vinstr_ctx->suspend_waitq); - break; - default: - break; - } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - /* Accumulate values of collected counters. */ if (!rcode) accum_clients(vinstr_ctx); @@ -1063,20 +907,6 @@ static int kbasep_vinstr_fill_dump_buffer_kernel( static void kbasep_vinstr_reprogram( struct kbase_vinstr_context *vinstr_ctx) { - unsigned long flags; - bool suspended = false; - - /* Don't enable hardware counters if vinstr is suspended. */ - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - if (VINSTR_IDLE != vinstr_ctx->state) - suspended = true; - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - if (suspended) - return; - - /* Change to suspended state is done while holding vinstr context - * lock. Below code will then no re-enable the instrumentation. */ - if (vinstr_ctx->reprogram) { struct kbase_vinstr_client *iter; @@ -1181,7 +1011,6 @@ static int kbasep_vinstr_service_task(void *data) while (!kthread_should_stop()) { struct kbase_vinstr_client *cli = NULL; struct kbase_vinstr_client *tmp; - int rcode; u64 timestamp = kbasep_vinstr_get_timestamp(); u64 dump_time = 0; @@ -1224,8 +1053,7 @@ static int kbasep_vinstr_service_task(void *data) continue; } - rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, - ×tamp); + kbasep_vinstr_collect_and_accumulate(vinstr_ctx, ×tamp); INIT_LIST_HEAD(&expired_requests); @@ -1254,11 +1082,10 @@ static int kbasep_vinstr_service_task(void *data) /* Expect only periodically sampled clients. */ BUG_ON(0 == cli->dump_interval); - if (!rcode) - kbasep_vinstr_update_client( - cli, - timestamp, - BASE_HWCNT_READER_EVENT_PERIODIC); + kbasep_vinstr_update_client( + cli, + timestamp, + BASE_HWCNT_READER_EVENT_PERIODIC); /* Set new dumping time. Drop missed probing times. */ do { @@ -1387,6 +1214,11 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval( mutex_lock(&vinstr_ctx->lock); + if (vinstr_ctx->suspended) { + mutex_unlock(&vinstr_ctx->lock); + return -EBUSY; + } + list_del(&cli->list); cli->dump_interval = interval; @@ -1623,8 +1455,7 @@ static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp, struct vm_area_struct *vma) { struct kbase_vinstr_client *cli; - unsigned long size, addr, pfn, offset; - unsigned long vm_size = vma->vm_end - vma->vm_start; + size_t size; KBASE_DEBUG_ASSERT(filp); KBASE_DEBUG_ASSERT(vma); @@ -1633,24 +1464,14 @@ static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp, KBASE_DEBUG_ASSERT(cli); size = cli->buffer_count * cli->dump_size; - - if (vma->vm_pgoff > (size >> PAGE_SHIFT)) - return -EINVAL; - if (vm_size > size) - return -EINVAL; - - offset = vma->vm_pgoff << PAGE_SHIFT; - if ((vm_size + offset) > size) - return -EINVAL; - - addr = __pa((unsigned long)cli->dump_buffers + offset); - pfn = addr >> PAGE_SHIFT; + if (vma->vm_end - vma->vm_start > size) + return -ENOMEM; return remap_pfn_range( vma, vma->vm_start, - pfn, - vm_size, + __pa((unsigned long)cli->dump_buffers) >> PAGE_SHIFT, + size, vma->vm_page_prot); } @@ -1677,84 +1498,6 @@ static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode, /*****************************************************************************/ -/** - * kbasep_vinstr_kick_scheduler - trigger scheduler cycle - * @kbdev: pointer to kbase device structure - */ -static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev) -{ - struct kbasep_js_device_data *js_devdata = &kbdev->js_data; - unsigned long flags; - - down(&js_devdata->schedule_sem); - spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); - kbase_jm_kick_all(kbdev); - spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); - up(&js_devdata->schedule_sem); -} - -/** - * kbasep_vinstr_suspend_worker - worker suspending vinstr module - * @data: pointer to work structure - */ -static void kbasep_vinstr_suspend_worker(struct work_struct *data) -{ - struct kbase_vinstr_context *vinstr_ctx; - unsigned long flags; - - vinstr_ctx = container_of(data, struct kbase_vinstr_context, - suspend_work); - - mutex_lock(&vinstr_ctx->lock); - - if (vinstr_ctx->kctx) - disable_hwcnt(vinstr_ctx); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - vinstr_ctx->state = VINSTR_SUSPENDED; - wake_up_all(&vinstr_ctx->suspend_waitq); - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - mutex_unlock(&vinstr_ctx->lock); - - /* Kick GPU scheduler to allow entering protected mode. - * This must happen after vinstr was suspended. */ - kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); -} - -/** - * kbasep_vinstr_suspend_worker - worker resuming vinstr module - * @data: pointer to work structure - */ -static void kbasep_vinstr_resume_worker(struct work_struct *data) -{ - struct kbase_vinstr_context *vinstr_ctx; - unsigned long flags; - - vinstr_ctx = container_of(data, struct kbase_vinstr_context, - resume_work); - - mutex_lock(&vinstr_ctx->lock); - - if (vinstr_ctx->kctx) - enable_hwcnt(vinstr_ctx); - - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - vinstr_ctx->state = VINSTR_IDLE; - wake_up_all(&vinstr_ctx->suspend_waitq); - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - mutex_unlock(&vinstr_ctx->lock); - - /* Kick GPU scheduler to allow entering protected mode. - * Note that scheduler state machine might requested re-entry to - * protected mode before vinstr was resumed. - * This must happen after vinstr was release. */ - kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev); -} - -/*****************************************************************************/ - struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev) { struct kbase_vinstr_context *vinstr_ctx; @@ -1766,14 +1509,8 @@ struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev) INIT_LIST_HEAD(&vinstr_ctx->idle_clients); INIT_LIST_HEAD(&vinstr_ctx->waiting_clients); mutex_init(&vinstr_ctx->lock); - spin_lock_init(&vinstr_ctx->state_lock); vinstr_ctx->kbdev = kbdev; vinstr_ctx->thread = NULL; - vinstr_ctx->state = VINSTR_IDLE; - vinstr_ctx->suspend_cnt = 0; - INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker); - INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker); - init_waitqueue_head(&vinstr_ctx->suspend_waitq); atomic_set(&vinstr_ctx->request_pending, 0); init_waitqueue_head(&vinstr_ctx->waitq); @@ -1789,10 +1526,6 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx) if (vinstr_ctx->thread) kthread_stop(vinstr_ctx->thread); - /* Wait for workers. */ - flush_work(&vinstr_ctx->suspend_work); - flush_work(&vinstr_ctx->resume_work); - while (1) { struct list_head *list = &vinstr_ctx->idle_clients; @@ -1925,6 +1658,11 @@ int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli, mutex_lock(&vinstr_ctx->lock); + if (vinstr_ctx->suspended) { + rcode = -EBUSY; + goto exit; + } + if (event_mask & cli->event_mask) { rcode = kbasep_vinstr_collect_and_accumulate( vinstr_ctx, @@ -1960,6 +1698,11 @@ int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli) mutex_lock(&vinstr_ctx->lock); + if (vinstr_ctx->suspended) { + rcode = -EBUSY; + goto exit; + } + rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused); if (rcode) goto exit; @@ -1976,66 +1719,40 @@ int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli) return rcode; } -int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx) +void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx) { - unsigned long flags; - int ret = -EAGAIN; + u64 unused; KBASE_DEBUG_ASSERT(vinstr_ctx); - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - switch (vinstr_ctx->state) { - case VINSTR_SUSPENDED: - vinstr_ctx->suspend_cnt++; - /* overflow shall not happen */ - BUG_ON(0 == vinstr_ctx->suspend_cnt); - ret = 0; - break; - - case VINSTR_IDLE: - vinstr_ctx->state = VINSTR_SUSPENDING; - schedule_work(&vinstr_ctx->suspend_work); - break; - - case VINSTR_DUMPING: - vinstr_ctx->state = VINSTR_SUSPENDING; - break; - - case VINSTR_SUSPENDING: - /* fall through */ - case VINSTR_RESUMING: - break; - - default: - BUG(); - break; + mutex_lock(&vinstr_ctx->lock); + if (!vinstr_ctx->nclients || vinstr_ctx->suspended) { + mutex_unlock(&vinstr_ctx->lock); + return; } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); - - return ret; -} -void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx) -{ - wait_event(vinstr_ctx->suspend_waitq, - (0 == kbase_vinstr_try_suspend(vinstr_ctx))); + kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused); + vinstr_ctx->suspended = true; + vinstr_ctx->suspended_clients = vinstr_ctx->waiting_clients; + INIT_LIST_HEAD(&vinstr_ctx->waiting_clients); + mutex_unlock(&vinstr_ctx->lock); } -void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx) +void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx) { - unsigned long flags; - KBASE_DEBUG_ASSERT(vinstr_ctx); - spin_lock_irqsave(&vinstr_ctx->state_lock, flags); - BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state); - if (VINSTR_SUSPENDED == vinstr_ctx->state) { - BUG_ON(0 == vinstr_ctx->suspend_cnt); - vinstr_ctx->suspend_cnt--; - if (0 == vinstr_ctx->suspend_cnt) { - vinstr_ctx->state = VINSTR_RESUMING; - schedule_work(&vinstr_ctx->resume_work); - } + mutex_lock(&vinstr_ctx->lock); + if (!vinstr_ctx->nclients || !vinstr_ctx->suspended) { + mutex_unlock(&vinstr_ctx->lock); + return; } - spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags); + + vinstr_ctx->suspended = false; + vinstr_ctx->waiting_clients = vinstr_ctx->suspended_clients; + vinstr_ctx->reprogram = true; + kbasep_vinstr_reprogram(vinstr_ctx); + atomic_set(&vinstr_ctx->request_pending, 1); + wake_up_all(&vinstr_ctx->waitq); + mutex_unlock(&vinstr_ctx->lock); } diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h index 6207d25aef06c..d32462aec653f 100644 --- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2015-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -103,39 +103,18 @@ int kbase_vinstr_hwc_dump( int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli); /** - * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context + * kbase_vinstr_hwc_suspend - suspends hardware counter collection for + * a given kbase context * @vinstr_ctx: vinstr context - * - * Return: 0 on success, or negative if state change is in progress - * - * Warning: This API call is non-generic. It is meant to be used only by - * job scheduler state machine. - * - * Function initiates vinstr switch to suspended state. Once it was called - * vinstr enters suspending state. If function return non-zero value, it - * indicates that state switch is not complete and function must be called - * again. On state switch vinstr will trigger job scheduler state machine - * cycle. - */ -int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx); - -/** - * kbase_vinstr_suspend - suspends operation of a given vinstr context - * @vinstr_ctx: vinstr context - * - * Function initiates vinstr switch to suspended state. Then it blocks until - * operation is completed. */ -void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx); +void kbase_vinstr_hwc_suspend(struct kbase_vinstr_context *vinstr_ctx); /** - * kbase_vinstr_resume - resumes operation of a given vinstr context + * kbase_vinstr_hwc_resume - resumes hardware counter collection for + * a given kbase context * @vinstr_ctx: vinstr context - * - * Function can be called only if it was preceded by a successful call - * to kbase_vinstr_suspend. */ -void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx); +void kbase_vinstr_hwc_resume(struct kbase_vinstr_context *vinstr_ctx); /** * kbase_vinstr_dump_size - Return required size of dump buffer @@ -147,7 +126,7 @@ size_t kbase_vinstr_dump_size(struct kbase_device *kbdev); /** * kbase_vinstr_detach_client - Detach a client from the vinstr core - * @cli: pointer to vinstr client + * @cli: Pointer to vinstr client */ void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli); diff --git a/drivers/gpu/arm/midgard/mali_linux_trace.h b/drivers/gpu/arm/midgard/mali_linux_trace.h index 2be06a5527689..fc3cf32ba4d28 100644 --- a/drivers/gpu/arm/midgard/mali_linux_trace.h +++ b/drivers/gpu/arm/midgard/mali_linux_trace.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -15,15 +15,19 @@ + + #if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MALI_H +#include +#include + #undef TRACE_SYSTEM #define TRACE_SYSTEM mali +#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM) #define TRACE_INCLUDE_FILE mali_linux_trace -#include - #define MALI_JOB_SLOTS_EVENT_CHANGED /** @@ -179,6 +183,24 @@ TRACE_EVENT(mali_total_alloc_pages_change, TP_printk("event=%lld", __entry->event_id) ); +/** + * mali_sw_counter - not currently used + * @event_id: counter id + */ +TRACE_EVENT(mali_sw_counter, + TP_PROTO(unsigned int event_id, signed long long value), + TP_ARGS(event_id, value), + TP_STRUCT__entry( + __field(int, event_id) + __field(long long, value) + ), + TP_fast_assign( + __entry->event_id = event_id; + __entry->value = value; + ), + TP_printk("event %d = %lld", __entry->event_id, __entry->value) +); + #endif /* _TRACE_MALI_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h index de6c206f3c71e..778104b57fa92 100644 --- a/drivers/gpu/arm/midgard/mali_midg_regmap.h +++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -58,7 +58,7 @@ #define GPU_COMMAND 0x030 /* (WO) */ #define GPU_STATUS 0x034 /* (RO) */ -#define LATEST_FLUSH 0x038 /* (RO) */ + #define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ @@ -169,8 +169,6 @@ #define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ #define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ -#define COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */ -#define COHERENCY_ENABLE 0x304 /* (RW) Coherency enable */ #define JM_CONFIG 0xF00 /* (RW) Job Manager configuration register (Implementation specific register) */ #define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration settings (Implementation specific register) */ @@ -214,8 +212,6 @@ #define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ #define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ #define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ -#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job - slot n */ #define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ #define JS_STATUS 0x24 /* (RO) Status register for job slot n */ @@ -226,12 +222,9 @@ #define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ #define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ #define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ -#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for - job slot n */ #define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ -#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ #define MEMORY_MANAGEMENT_BASE 0x2000 #define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) @@ -273,14 +266,6 @@ #define AS_STATUS 0x28 /* (RO) Status flags for address space n */ -/* (RW) Translation table configuration for address space n, low word */ -#define AS_TRANSCFG_LO 0x30 -/* (RW) Translation table configuration for address space n, high word */ -#define AS_TRANSCFG_HI 0x34 -/* (RO) Secondary fault address for address space n, low word */ -#define AS_FAULTEXTRA_LO 0x38 -/* (RO) Secondary fault address for address space n, high word */ -#define AS_FAULTEXTRA_HI 0x3C /* End Register Offsets */ @@ -308,11 +293,6 @@ #define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003 -/* - * Begin AARCH64 MMU TRANSTAB register values - */ -#define MMU_HW_OUTA_BITS 40 -#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4)) /* * Begin MMU STATUS register values @@ -325,38 +305,12 @@ #define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) #define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT (0x4<<3) -#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3) #define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3<<8) -#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0<<8) #define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1<<8) #define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2<<8) #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3<<8) -/* - * Begin MMU TRANSCFG register values - */ - -#define AS_TRANSCFG_ADRMODE_LEGACY 0 -#define AS_TRANSCFG_ADRMODE_UNMAPPED 1 -#define AS_TRANSCFG_ADRMODE_IDENTITY 2 -#define AS_TRANSCFG_ADRMODE_AARCH64_4K 6 -#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8 - -#define AS_TRANSCFG_ADRMODE_MASK 0xF - - -/* - * Begin TRANSCFG register values - */ -#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24) -#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24) -#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24) - -#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28)) -#define AS_TRANSCFG_PTW_SH_OS (2 << 28) -#define AS_TRANSCFG_PTW_SH_IS (3 << 28) /* * Begin Command Values @@ -394,15 +348,8 @@ #define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION #define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) #define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) -#define JS_CONFIG_ENABLE_FLUSH_REDUCTION (1u << 14) -#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK (1u << 15) #define JS_CONFIG_THREAD_PRI(n) ((n) << 16) -/* JS_XAFFINITY register values */ -#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0) -#define JS_XAFFINITY_TILER_ENABLE (1u << 8) -#define JS_XAFFINITY_CACHE_ENABLE (1u << 16) - /* JS_STATUS register values */ /* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. @@ -453,35 +400,19 @@ #define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ #define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ #define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ -#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */ /* End Command Values */ /* GPU_STATUS values */ #define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ -#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ /* PRFCNT_CONFIG register values */ -#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ -#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ -#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */ - +#define PRFCNT_CONFIG_AS_SHIFT 4 /* address space bitmap starts from bit 4 of the register */ #define PRFCNT_CONFIG_MODE_OFF 0 /* The performance counters are disabled. */ #define PRFCNT_CONFIG_MODE_MANUAL 1 /* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */ #define PRFCNT_CONFIG_MODE_TILE 2 /* The performance counters are enabled, and are written out each time a tile finishes rendering. */ /* AS_MEMATTR values: */ -/* Use GPU implementation-defined caching policy. */ -#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull -/* The attribute set to force all resources to be cached. */ -#define AS_MEMATTR_FORCE_TO_CACHE_ALL 0x8Full -/* Inner write-alloc cache setup, no outer caching */ -#define AS_MEMATTR_WRITE_ALLOC 0x8Dull - -/* Set to implementation defined, outer caching */ -#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull -/* Set to write back memory, outer caching */ -#define AS_MEMATTR_AARCH64_OUTER_WA 0x8Dull /* Use GPU implementation-defined caching policy. */ #define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull @@ -526,8 +457,6 @@ /* End JS_FEATURES register */ /* L2_MMU_CONFIG register */ -#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT (23) -#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT) #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT (24) #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) #define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) @@ -576,4 +505,5 @@ /* End TILER_CONFIG register */ + #endif /* _MIDGARD_REGMAP_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_timeline.h b/drivers/gpu/arm/midgard/mali_timeline.h index bd5f6614b6bb0..c3563723cb633 100644 --- a/drivers/gpu/arm/midgard/mali_timeline.h +++ b/drivers/gpu/arm/midgard/mali_timeline.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -386,6 +386,7 @@ TRACE_EVENT(mali_timeline_context_active, __entry->count) ); + #endif /* _MALI_TIMELINE_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h index 34f6d57382e28..d4813f7f8a358 100644 --- a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -73,8 +73,8 @@ extern struct kbase_pm_callback_conf pm_callbacks; /** - * Protected mode switch + * Secure mode switch * - * Attached value: pointer to @ref kbase_protected_ops + * Attached value: pointer to @ref kbase_secure_ops */ -#define PROTECTED_CALLBACKS (NULL) +#define SECURE_CALLBACKS (NULL) diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c index c654818109276..3baf3d96d41af 100644 --- a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c +++ b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -66,76 +66,48 @@ struct kbase_pm_callback_conf pm_callbacks = { }; /* - * Juno Protected Mode integration + * Juno Secure Mode integration */ /* SMC Function Numbers */ -#define JUNO_SMC_PROTECTED_ENTER_FUNC 0xff06 -#define JUNO_SMC_PROTECTED_RESET_FUNC 0xff07 +#define JUNO_SMC_SECURE_ENABLE_FUNC 0xff06 +#define JUNO_SMC_SECURE_DISABLE_FUNC 0xff07 -static int juno_protected_mode_enter(struct kbase_device *kbdev) +static int juno_secure_mode_enable(struct kbase_device *kbdev) { - /* T62X in SoC detected */ - u64 ret = kbase_invoke_smc(SMC_OEN_SIP, - JUNO_SMC_PROTECTED_ENTER_FUNC, false, - 0, 0, 0); - return ret; -} - -/* TODO: Remove these externs, reset should should be done by the firmware */ -extern void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, - struct kbase_context *kctx); - -extern u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, - struct kbase_context *kctx); - -static int juno_protected_mode_reset(struct kbase_device *kbdev) -{ - - /* T62X in SoC detected */ - u64 ret = kbase_invoke_smc(SMC_OEN_SIP, - JUNO_SMC_PROTECTED_RESET_FUNC, false, - 0, 0, 0); - - /* TODO: Remove this reset, it should be done by the firmware */ - kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), - GPU_COMMAND_HARD_RESET, NULL); - - while ((kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) - & RESET_COMPLETED) != RESET_COMPLETED) - ; + u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - return ret; + if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) && + kbdev->reg_start == 0x2d000000) { + /* T62X in SoC detected */ + u64 ret = kbase_invoke_smc(SMC_OEN_SIP, + JUNO_SMC_SECURE_ENABLE_FUNC, false, + 0, 0, 0); + return ret; + } + + return -EINVAL; /* Not supported */ } -static bool juno_protected_mode_supported(struct kbase_device *kbdev) +static int juno_secure_mode_disable(struct kbase_device *kbdev) { u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; - /* - * Protected mode is only supported for the built in GPU - * _and_ only if the right firmware is running. - * - * Given that at init time the GPU is not powered up the - * juno_protected_mode_reset function can't be used as - * is needs to access GPU registers. - * However, although we don't want the GPU to boot into - * protected mode we know a GPU reset will be done after - * this function is called so although we set the GPU to - * protected mode it will exit protected mode before the - * driver is ready to run work. - */ if (gpu_id == GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0) && - (kbdev->reg_start == 0x2d000000)) - return juno_protected_mode_enter(kbdev) == 0; - - return false; + kbdev->reg_start == 0x2d000000) { + /* T62X in SoC detected */ + u64 ret = kbase_invoke_smc(SMC_OEN_SIP, + JUNO_SMC_SECURE_DISABLE_FUNC, false, + 0, 0, 0); + return ret; + } + + return -EINVAL; /* Not supported */ } -struct kbase_protected_ops juno_protected_ops = { - .protected_mode_enter = juno_protected_mode_enter, - .protected_mode_reset = juno_protected_mode_reset, - .protected_mode_supported = juno_protected_mode_supported, +struct kbase_secure_ops juno_secure_ops = { + .secure_mode_enable = juno_secure_mode_enable, + .secure_mode_disable = juno_secure_mode_disable, }; static struct kbase_platform_config versatile_platform_config = { diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h index ab29e9d9c0a69..5fc6d9e1c312c 100644 --- a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -71,14 +71,14 @@ #define PLATFORM_FUNCS (NULL) /** - * Protected mode switch + * Secure mode switch * - * Attached value: pointer to @ref kbase_protected_ops + * Attached value: pointer to @ref kbase_secure_ops */ -#define PROTECTED_CALLBACKS (&juno_protected_ops) +#define SECURE_CALLBACKS (&juno_secure_ops) extern struct kbase_pm_callback_conf pm_callbacks; #ifdef CONFIG_DEVFREQ_THERMAL extern struct devfreq_cooling_ops juno_model_ops; #endif -extern struct kbase_protected_ops juno_protected_ops; +extern struct kbase_secure_ops juno_secure_ops; diff --git a/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_rk.c b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_rk.c old mode 100644 new mode 100755 index bcf7971a5fdb8..9514daf42a8ad --- a/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_rk.c +++ b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_config_rk.c @@ -14,12 +14,9 @@ #include #include #include -#include #include #include -#include -#include #include "mali_kbase_rk.h" @@ -42,54 +39,18 @@ /*---------------------------------------------------------------------------*/ -#ifdef CONFIG_REGULATOR static int rk_pm_enable_regulator(struct kbase_device *kbdev); + static void rk_pm_disable_regulator(struct kbase_device *kbdev); -#else -static inline int rk_pm_enable_regulator(struct kbase_device *kbdev) -{ - return 0; -} -static inline void rk_pm_disable_regulator(struct kbase_device *kbdev) -{ -} -#endif static int rk_pm_enable_clk(struct kbase_device *kbdev); static void rk_pm_disable_clk(struct kbase_device *kbdev); -static int kbase_platform_rk_create_sysfs_files(struct device *dev); - -static void kbase_platform_rk_remove_sysfs_files(struct device *dev); - /*---------------------------------------------------------------------------*/ -static void rk_pm_power_off_delay_work(struct work_struct *work) -{ - struct rk_context *platform = - container_of(to_delayed_work(work), struct rk_context, work); - struct kbase_device *kbdev = platform->kbdev; - - if (!platform->is_powered) { - D("mali_dev is already powered off."); - return; - } - - if (pm_runtime_enabled(kbdev->dev)) { - D("to put_sync_suspend mali_dev."); - pm_runtime_put_sync_suspend(kbdev->dev); - } - - rk_pm_disable_regulator(kbdev); - - platform->is_powered = false; - KBASE_TIMELINE_GPU_POWER(kbdev, 0); -} - static int kbase_platform_rk_init(struct kbase_device *kbdev) { - int ret = 0; struct rk_context *platform; platform = kzalloc(sizeof(*platform), GFP_KERNEL); @@ -99,49 +60,14 @@ static int kbase_platform_rk_init(struct kbase_device *kbdev) } platform->is_powered = false; - platform->kbdev = kbdev; - - platform->delay_ms = 200; - if (of_property_read_u32(kbdev->dev->of_node, "power-off-delay-ms", - &platform->delay_ms)) - W("power-off-delay-ms not available."); - - platform->power_off_wq = create_freezable_workqueue("gpu_power_off_wq"); - if (!platform->power_off_wq) { - E("couldn't create workqueue"); - return -ENOMEM; - } - INIT_DEFERRABLE_WORK(&platform->work, rk_pm_power_off_delay_work); - platform->utilisation_period = DEFAULT_UTILISATION_PERIOD_IN_MS; - - ret = kbase_platform_rk_create_sysfs_files(kbdev->dev); - if (ret) { - E("fail to create sysfs_files. ret = %d.", ret); - goto EXIT; - } kbdev->platform_context = (void *)platform; - pm_runtime_enable(kbdev->dev); -EXIT: - return ret; + return 0; } static void kbase_platform_rk_term(struct kbase_device *kbdev) { - struct rk_context *platform = - (struct rk_context *)kbdev->platform_context; - - pm_runtime_disable(kbdev->dev); - kbdev->platform_context = NULL; - - if (platform) { - destroy_workqueue(platform->power_off_wq); - platform->is_powered = false; - platform->kbdev = NULL; - kfree(platform); - } - kbase_platform_rk_remove_sysfs_files(kbdev->dev); } struct kbase_platform_funcs_conf platform_funcs = { @@ -164,21 +90,16 @@ static int rk_pm_callback_power_on(struct kbase_device *kbdev) { int ret = 1; /* Assume GPU has been powered off */ int err = 0; - struct rk_context *platform = get_rk_context(kbdev); - - cancel_delayed_work_sync(&platform->work); - - err = rk_pm_enable_clk(kbdev); - if (err) { - E("failed to enable clk: %d", err); - return err; - } + struct rk_context *platform; + platform = (struct rk_context *)kbdev->platform_context; if (platform->is_powered) { - D("mali_device is already powered."); + W("mali_device is already powered."); return 0; } + D("powering on."); + /* we must enable vdd_gpu before pd_gpu_in_chip. */ err = rk_pm_enable_regulator(kbdev); if (err) { @@ -202,6 +123,12 @@ static int rk_pm_callback_power_on(struct kbase_device *kbdev) } } + err = rk_pm_enable_clk(kbdev); /* clk is not relative to pd. */ + if (err) { + E("failed to enable clk: %d", err); + return err; + } + platform->is_powered = true; KBASE_TIMELINE_GPU_POWER(kbdev, 1); @@ -210,20 +137,47 @@ static int rk_pm_callback_power_on(struct kbase_device *kbdev) static void rk_pm_callback_power_off(struct kbase_device *kbdev) { - struct rk_context *platform = get_rk_context(kbdev); + struct rk_context *platform = + (struct rk_context *)kbdev->platform_context; + + if (!platform->is_powered) { + W("mali_dev is already powered off."); + return; + } + + D("powering off."); + + platform->is_powered = false; + KBASE_TIMELINE_GPU_POWER(kbdev, 0); rk_pm_disable_clk(kbdev); - queue_delayed_work(platform->power_off_wq, &platform->work, - msecs_to_jiffies(platform->delay_ms)); + + if (pm_runtime_enabled(kbdev->dev)) { + pm_runtime_mark_last_busy(kbdev->dev); + D("to put_sync_suspend mali_dev."); + pm_runtime_put_sync_suspend(kbdev->dev); + } + + rk_pm_disable_regulator(kbdev); } int rk_kbase_device_runtime_init(struct kbase_device *kbdev) { + pm_runtime_set_autosuspend_delay(kbdev->dev, 200); + pm_runtime_use_autosuspend(kbdev->dev); + + /* no need to call pm_runtime_set_active here. */ + + D("to enable pm_runtime."); + pm_runtime_enable(kbdev->dev); + return 0; } void rk_kbase_device_runtime_disable(struct kbase_device *kbdev) { + D("to disable pm_runtime."); + pm_runtime_disable(kbdev->dev); } struct kbase_pm_callback_conf pm_callbacks = { @@ -250,15 +204,6 @@ int kbase_platform_early_init(void) /*---------------------------------------------------------------------------*/ -void kbase_platform_rk_shutdown(struct kbase_device *kbdev) -{ - I("to make vdd_gpu enabled for turning off pd_gpu in pm_framework."); - rk_pm_enable_regulator(kbdev); -} - -/*---------------------------------------------------------------------------*/ - -#ifdef CONFIG_REGULATOR static int rk_pm_enable_regulator(struct kbase_device *kbdev) { int ret = 0; @@ -289,7 +234,6 @@ static void rk_pm_disable_regulator(struct kbase_device *kbdev) D("to disable regulator."); regulator_disable(kbdev->regulator); } -#endif static int rk_pm_enable_clk(struct kbase_device *kbdev) { @@ -317,97 +261,3 @@ static void rk_pm_disable_clk(struct kbase_device *kbdev) } } -/*---------------------------------------------------------------------------*/ - -static ssize_t utilisation_period_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct kbase_device *kbdev = dev_get_drvdata(dev); - struct rk_context *platform = get_rk_context(kbdev); - ssize_t ret = 0; - - ret += snprintf(buf, PAGE_SIZE, "%u\n", platform->utilisation_period); - - return ret; -} - -static ssize_t utilisation_period_store(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct kbase_device *kbdev = dev_get_drvdata(dev); - struct rk_context *platform = get_rk_context(kbdev); - int ret = 0; - - ret = kstrtouint(buf, 0, &platform->utilisation_period); - if (ret) { - E("invalid input period : %s.", buf); - return ret; - } - D("set utilisation_period to '%d'.", platform->utilisation_period); - - return count; -} - -static ssize_t utilisation_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct kbase_device *kbdev = dev_get_drvdata(dev); - struct rk_context *platform = get_rk_context(kbdev); - ssize_t ret = 0; - unsigned long period_in_us = platform->utilisation_period * 1000; - unsigned long total_time; - unsigned long busy_time; - unsigned long utilisation; - - kbase_pm_reset_dvfs_utilisation(kbdev); - usleep_range(period_in_us, period_in_us + 100); - kbase_pm_get_dvfs_utilisation(kbdev, &total_time, &busy_time); - /* 'devfreq_dev_profile' instance registered to devfreq - * also uses kbase_pm_reset_dvfs_utilisation - * and kbase_pm_get_dvfs_utilisation. - * it's better to cat this file when DVFS is disabled. - */ - D("total_time : %lu, busy_time : %lu.", total_time, busy_time); - - utilisation = busy_time * 100 / total_time; - ret += snprintf(buf, PAGE_SIZE, "%ld\n", utilisation); - - return ret; -} - -static DEVICE_ATTR_RW(utilisation_period); -static DEVICE_ATTR_RO(utilisation); - -static int kbase_platform_rk_create_sysfs_files(struct device *dev) -{ - int ret = 0; - - ret = device_create_file(dev, &dev_attr_utilisation_period); - if (ret) { - E("fail to create sysfs file 'utilisation_period'."); - goto out; - } - - ret = device_create_file(dev, &dev_attr_utilisation); - if (ret) { - E("fail to create sysfs file 'utilisation'."); - goto remove_utilisation_period; - } - - return 0; - -remove_utilisation_period: - device_remove_file(dev, &dev_attr_utilisation_period); -out: - return ret; -} - -static void kbase_platform_rk_remove_sysfs_files(struct device *dev) -{ - device_remove_file(dev, &dev_attr_utilisation_period); - device_remove_file(dev, &dev_attr_utilisation); -} diff --git a/drivers/gpu/arm/midgard/platform/rk/mali_kbase_rk.h b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_rk.h index e1623806abe8b..a4ebd50415bb5 100644 --- a/drivers/gpu/arm/midgard/platform/rk/mali_kbase_rk.h +++ b/drivers/gpu/arm/midgard/platform/rk/mali_kbase_rk.h @@ -17,34 +17,14 @@ /*---------------------------------------------------------------------------*/ -#define DEFAULT_UTILISATION_PERIOD_IN_MS (100) - -/*---------------------------------------------------------------------------*/ - -/* +/** * struct rk_context - work_context of platform_dependent_part_of_rk. + * @is_powered: record the status + * of common_parts calling 'power_on_callback' and 'power_off_callback'. */ struct rk_context { - /* - * record the status of common_parts calling 'power_on_callback' - * and 'power_off_callback'. - */ bool is_powered; - struct kbase_device *kbdev; - struct workqueue_struct *power_off_wq; - struct delayed_work work; - unsigned int delay_ms; - /* debug only, the period in ms to count gpu_utilisation. */ - unsigned int utilisation_period; }; -/*---------------------------------------------------------------------------*/ - -static inline struct rk_context *get_rk_context( - const struct kbase_device *kbdev) -{ - return (struct rk_context *)(kbdev->platform_context); -} - #endif /* _MALI_KBASE_RK_H_ */ diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h index dc4471beae676..6384586371d06 100644 --- a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h +++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2014-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,7 +24,7 @@ * Attached value: number in kHz * Default value: NA */ -#define GPU_FREQ_KHZ_MAX kbase_get_platform_max_freq() +#define GPU_FREQ_KHZ_MAX (5000) /** * Minimum frequency GPU will be clocked at. Given in kHz. * This must be specified as there is no default value. @@ -32,7 +32,16 @@ * Attached value: number in kHz * Default value: NA */ -#define GPU_FREQ_KHZ_MIN kbase_get_platform_min_freq() +#define GPU_FREQ_KHZ_MIN (5000) + +/** + * Values used for determining the GPU frequency based on the LogicTile type + * Used by the function kbase_get_platform_logic_tile_type + */ +#define VE_VIRTEX6_GPU_FREQ_MIN 5000 +#define VE_VIRTEX6_GPU_FREQ_MAX 5000 +#define VE_VIRTEX7_GPU_FREQ_MIN 40000 +#define VE_VIRTEX7_GPU_FREQ_MAX 40000 /** * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock @@ -73,10 +82,10 @@ #define PLATFORM_FUNCS (NULL) /** - * Protected mode switch + * Secure mode switch * - * Attached value: pointer to @ref kbase_protected_ops + * Attached value: pointer to @ref kbase_secure_ops */ -#define PROTECTED_CALLBACKS (NULL) +#define SECURE_CALLBACKS (NULL) extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c index 15ce2bc5eea5e..687b1a8c04319 100644 --- a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c +++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c index 4665f98cbbe48..9bc51f1e2da82 100644 --- a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c +++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c @@ -1,6 +1,6 @@ /* * - * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,18 +52,6 @@ #define IS_SINGLE_BIT_SET(val, pos) (val&(1<