@conference {KumVya14A,
	title = {Cache Design for Mixed Critical Real-Time Systems},
	booktitle = {Proceedings of the International Conference on Computer Design (ICCD)},
	year = {2014},
	month = {October},
	abstract = {Shared caches in mixed criticality systems are a source of interference for safety critical tasks. Shared memory not only leads to worst-case execution time (WCET) pessimism, but also affects the response time of safety critical tasks. In this paper, we present a criticality aware cache design which implements a Least Critical (LC) cache replacement policy, where a least recently used non-critical cache line is replaced during a cache miss. The cache acts as a Least Recently Used (LRU) cache if there are no critical lines or if all cache lines are critical in a set. In our design, data within a certain address space is given higher preference in the cache. These critical address spaces are configured using critical address range (CAR) registers. The new cache design was implemented in a Leon3 processor core, a 32bit processor compliant with the SPARC V8 architecture. Experimental results are presented that illustrate the impact of the Least Critical cache replacement policy on the response time of critical tasks, and on overall application performance as compared to a conventional LRU cache policy.},
	author = {Chetan Kumar and Sudhanshu Vyas and Ron Cytron and Christopher Gill and Joseph Zambreno and Phillip Jones}
}
@article {VyaKum13A,
	title = {An FPGA-based Plant-on-Chip Platform for Cyber-Physical System Analysis},
	journal = {IEEE Embedded Systems Letters (ESL)},
	volume = {6},
	number = {1},
	year = {2014},
	pages = {4-7},
	abstract = {Digital control systems are traditionally designed independent of their implementation platform, assuming constant sensor sampling rates and processor response times. Applications are deployed to processors that are shared amongst control and non-control tasks, to maximize resource utilization. This potentially overlooks that computing mechanisms meant for improving average CPU usage, such as cache, interrupts, and task management through schedulers, contribute to non-deterministic interference between tasks. This response time jitter can result in reduced system stability, motivating further study by both the controls and computing communities to maximize CPU utilization, while maintaining physical system stability needs. In this paper, we describe an FPGA-based embedded software platform coupled with a hardware plant emulator (as opposed to purely software-based simulations or hardware-in-the-loop setups) that forms a basis for safe and accurate analysis of Cyber-Physical Systems. We model and analyze an inverted pendulum to demonstrate that our setup can provide a significantly more accurate representation of a real system.},
	author = {Sudhanshu Vyas and Chetan Kumar and Joseph Zambreno and Christopher Gill and Ron Cytron and Phillip Jones}
}
@article {KumVya13B,
	title = {Hardware-Software Architecture for Priority Queue Management in Real-time and Embedded Systems},
	journal = {International Journal of Embedded Systems (IJES)},
	volume = {6},
	number = {4},
	year = {2014},
	pages = {319-334},
	abstract = {The use of hardware-based data structures for accelerating real-time and embedded system applications is limited by the scarceness of hardware resources. By their nature, being limited by the silicon area available, hardware data structures cannot scale in size as easily as their software counterparts. We assert a hardware-software co-design approach is required to elegantly overcome these limitations. In this paper, we present a hybrid priority queue architecture that includes a hardware accelerated binary heap that can also be managed in software when its queue size exceeds hardware limits. A memory mapped interface provides software with access to priority-queue-structured on-chip memory, which enables quick and low overhead transitions between hardware and software management. As an application of this hybrid architecture, we present a scalable task scheduler for real-time systems that reduces scheduler processing overhead and improves timing determinism of the scheduler.

},
	author = {Chetan Kumar and Sudhanshu Vyas and Ron Cytron and Christopher Gill and Joseph Zambreno and Phillip Jones}
}
@article {VyaGup13A,
	title = {Hardware Architectural Support for Control Systems and Sensor Processing},
	journal = {ACM Transactions on Embedded Computing Systems (TECS)},
	volume = {13},
	number = {2},
	year = {2013},
	abstract = {The field of modern control theory and the systems used to implement these controls have shown rapid development over the last 50 years. It was often the case that those developing control algorithms could assume the computing medium was solely dedicated to the task of controlling a plant. For example, the control algorithm being implemented in software on a dedicated digital signal processor (DSP), or implemented in hardware using a simple dedicated programmable logic device (PLD). As time progressed, the drive to place more system functionality in a single component (reducing power, cost, and increasing reliability) has made this assumption less often true. Thus, it has been pointed out by some experts in the field of control theory (e.g. Astrom) that those developing control algorithms must take into account the effects of running their algorithms on systems that will be shared with other tasks. One aspect of the work presented is this article is a hardware architecture that allows control developers to maintain this simplifying assumption. We focus specifically on the proportional-integral-derivative (PID) controller. An on-chip coprocessor has been implemented that can scale to support servicing hundreds of plants, while maintaining microsecond level response times, tight deterministic control loop timing, and allows the main processor to service non-control tasks.

In order to control a plant, the controller needs information about the plant{\textquoteright}s state. Typically this information is obtained from sensors with which the plant has been instrumented. There are a number of
common computations that may be performed on this sensor data before being presented to the controller (e.g. averaging and thresholding). Thus in addition to supporting PID algorithms, we have developed a sensor processing unit (SPU) that off-loads these common sensor processing tasks from the main processor.

We have prototyped our ideas using Field Programmable Gate Array (FPGA) technology. Through our experimental results, we show our PID execution unit gives orders of magnitude improvement in response time when servicing many plants, as compared to a standard general software implementation. We also show that the SPU scales much better than a general software implementation. In addition, these execution units allow the simplifying assumption of dedicated computing medium to hold for control algorithm development.},
	author = {Sudhanshu Vyas and Adwait Gupte and Christopher Gill and Ron Cytron and Joseph Zambreno and Phillip Jones}
}
@conference {KumVya13A,
	title = {Scheduling Challenges in Mixed Critical Real-time Heterogeneous Computing Platforms},
	booktitle = {Proceedings of Dynamic Data Driven Application Systems (DDDAS)},
	year = {2013},
	month = {June},
	abstract = {In Dynamic Data-Driven Application Systems (DDDAS), applications must dynamically adapt their behavior in response to objectives and conditions that change while deployed. Often these applications may be safety critical or tightly resource constrained, with a need for graceful degradation when introduced to unexpected conditions. This paper begins by motivating and providing a vision for a dynamically adaptable mixed critical computing platform to support DDDAS applications. We then specifically focus on the need for advancements in task models and scheduling algorithms to manage the resources of such a platform. We discuss the short comings of existing task models for capturing important attributes of our envisioned computing platform, and identify challenges that must be addressed when developing scheduling algorithms that act upon our proposed extended task model.},
	author = {Chetan Kumar and Sudhanshu Vyas and Ron Cytron and Christopher Gill and Joseph Zambreno and Phillip Jones}
}
@conference {KumVya12A,
	title = {Improving System Predictability and Performance via Hardware Accelerated Data Structures},
	booktitle = {Proceedings of Dynamic Data Driven Application Systems (DDDAS)},
	year = {2012},
	month = {June},
	abstract = {In Dynamic Data-Driven Application Systems, applications must dynamically adapt their behavior in response to objectives and conditions that change while deployed. One approach to achieve dynamic adaptation is to offer middleware that facilitates component migration between modalities in response to such dynamic changes. The triggering, planning, and cost evaluation of adaptation takes place within a scheduler. Scheduling overhead is a major limiting factor for implementing dynamic scheduling algorithms with high frequency timer-tick resolution in real time systems. In this paper, we present a scalable hardware scheduler architecture for real time systems that reduces processing overhead and improves timing predictability of the scheduler. A new hardware priority queue design is presented, which supports insertions in constant time, and removals in O(log n) time. The hardware scheduler supports three (Rate Monotonic Scheduling (RMS), Earliest Deadline First (EDF), priority based) scheduling algorithms, which can be configured during run-time. The interface to the scheduler is provided through a set of custom instructions as an extension to the processors instruction set architecture. We also report on our experience migrating between two implementations of an ordered-set implementation, with the goal of providing predictable performance for real-time applications.},
	author = {Chetan Kumar and Sudhanshu Vyas and Jonathan Shidal and Ron Cytron and Christopher Gill and Joseph Zambreno and Phillip Jones}
}