为什么在我的循环链表中 peek return 出乎意料的结果?
Why does peek return unexpected results in my circular linked list?
我正在尝试用 Rust 实现循环链表。我已经阅读了 Too Many Linked Lists 和关于 Rust Pin 类型的多个资源,并且我试图从头开始构建一个循环链表,但我对编译器吐出的内容感到困惑。
所以高级概念是我想要一个链表结构和节点结构定义如下:
use std::pin::Pin;
use std::marker::PhantomPinned;
use std::ptr;
#[derive(Debug)]
struct Node<T> {
value: T,
next: *mut Node<T>,
_pin: PhantomPinned,
}
#[derive(Debug)]
pub struct CircularLinkedList<T> {
last: *mut Node<T>,
size: usize
}
对于第一个被推入列表的节点,我希望 node.next 指向节点结构本身。即我希望在列表中推送新节点的逻辑遵循以下 Java 实现
public void push(Item item) {
Node node = new Node();
node.item = item;
if (isEmpty()) {
last = node;
last.next = last;
} else {
node.next = last.next;
last.next = node;
last = node;
}
size++;
}
到目前为止我的 Rust 实现是:
impl<T> CircularLinkedList<T> where T: std::fmt::Debug {
pub fn new() -> Self {
CircularLinkedList { last: ptr::null_mut(), size: 0 }
}
pub fn enqueue(&mut self, value: T) {
self.size = self.size + 1;
if self.last.is_null() {
let new_last = Node {
value,
next: ptr::null_mut(),
_pin: PhantomPinned,
};
// Pin new node
let mut pinned_new_last = Box::pin(new_last);
// Get raw pointer to new node
let ptr = pinned_new_last.as_ref().get_ref() as *const Node<T> as *mut Node<T>;
// Assign next new_last.next value of 'ptr'
unsafe {
let mut_ref: Pin<&mut Node<T>> = Pin::as_mut(&mut pinned_new_last);
Pin::get_unchecked_mut(mut_ref).next = ptr;
}
self.last = ptr;
}
else {
println!("else");
}
}
pub fn peek(&self) -> Option<&T> {
unsafe {
let a = &(*self.last).value;
Some(a)
}
}
}
在下面的 main() 函数中,我很困惑为什么 peek() 没有 return Some(1)
而 miri 给我一个内存错误
fn main () {
let mut list = LinkedList::CircularLinkedList::new();
list.enqueue(1);
println!("{:?}", list.peek());
// Some(2043) !????????, expecting Some(1)
}
警告:除非您完全理解代码的所有细节,否则不要编写 unsafe
代码! (虽然实验很好)。
你得到了 Use-After-Free。在 enqueue()
的末尾,Box
的析构函数为 pinned_new_last
执行并释放其内存。现在您的链接列表指向已释放的节点。在这种情况下添加 std::mem::forget()
就足够了:
pub fn enqueue(&mut self, value: T) {
self.size = self.size + 1;
if self.last.is_null() {
let new_last = Node {
value,
next: ptr::null_mut(),
_pin: PhantomPinned,
};
// Pin new node
let mut pinned_new_last = Box::pin(new_last);
// Get raw pointer to new node
let ptr = pinned_new_last.as_ref().get_ref() as *const Node<T> as *mut Node<T>;
// Assign next new_last.next value of 'ptr'
unsafe {
let mut_ref: Pin<&mut Node<T>> = Pin::as_mut(&mut pinned_new_last);
Pin::get_unchecked_mut(mut_ref).next = ptr;
}
std::mem::forget(pinned_new_last);
self.last = ptr;
} else {
println!("else");
}
}
当然会泄露内存。您必须注意在析构函数中释放它,例如:
impl<T> Drop for CircularLinkedList<T> {
fn drop(&mut self) {
let mut last = self.last;
let size = self.size;
// Panic safety
self.size = 0;
self.last = std::ptr::null_mut();
for _ in 0..size {
let current = last;
unsafe {
last = (*last).next;
Box::from_raw(current); // Drop it
}
}
}
}
另请注意,虽然存储节点作为*mut
指针是合理的,但实际上变异它是UB派生自共享引用,所以这确实是一种代码味道。
我花了一些时间来了解有关 Rust 的更多信息,并且能够提出以下不会引发任何 miri 错误的实现
pub mod LinkedList {
use std::marker::PhantomPinned;
use std::ptr;
use std::mem::ManuallyDrop;
// _pin -> make the struct implement !Unpin, and make it pinnable
#[derive(Debug)]
struct Node<T> where T: std::fmt::Debug {
value: T,
next: *mut Node<T>,
_pin: PhantomPinned,
}
#[derive(Debug)]
pub struct CircularLinkedList<T> where T: std::fmt::Debug {
last: *mut Node<T>,
size: usize
}
impl<T> CircularLinkedList<T> where T: std::fmt::Debug {
pub fn new() -> Self {
CircularLinkedList { last: ptr::null_mut(), size: 0 }
}
pub fn enqueue(&mut self, value: T) {
self.size = self.size + 1;
// Create a new Node struct, assign it to new_last variable
// Struct - no guarantees on data layout
let new_last = Node {
value,
next: ptr::null_mut(),
_pin: PhantomPinned,
};
// Construct a new <Pin<Box<T>>>
// new_last is pinned in heap, and can't move from this address
// pinned_new_last is Pin<Box<T>> type - a smart pointer
// Wrap in ManuallyDrop - 0-cost wrapper that stop compiler from calling destructor for pinned_new_last
// When Pin<Box<T>> dropped, both the pointer and pointee dropped, so need to clean this up in the drop
let mut pinned_new_last = ManuallyDrop::new(Box::pin(new_last));
if self.last.is_null() {
unsafe {
// as_mut gets Pin<Box<Node>> => Pin<&mut Node>
// get_unchecked_mut gets Pin<&mut Node> => &mut Node
// We coerce it into a *mut T
pinned_new_last.as_mut().get_unchecked_mut().next = pinned_new_last.as_mut().get_unchecked_mut() as *mut Node<T>;
self.last = pinned_new_last.as_mut().get_unchecked_mut() as *mut Node<T>;
}
} else {
unsafe {
pinned_new_last.as_mut().get_unchecked_mut().next = (*self.last).next;
(*self.last).next = pinned_new_last.as_mut().get_unchecked_mut() as *mut Node<T>;
self.last = pinned_new_last.as_mut().get_unchecked_mut() as *mut Node<T>;
}
}
}
pub fn dequeue(&mut self) -> Option<T> {
if self.last.is_null() {None}
else {
self.size = self.size - 1;
unsafe {
// This fixed the memory leak!!!
// Let Box destructor call destructor of T and free allocated memory
let last = Box::from_raw(self.last);
if self.size == 0 {self.last = ptr::null_mut();}
else {self.last = last.next;}
let value = (*last).value;
Some(value)
}
}
}
pub fn peek(&self) -> Option<&T> {
if self.last.is_null() {None}
else {
unsafe {
// Deref self.last (it is a *mut Node)
// Get value field
// Cast entire thing to &
Some(&(*self.last).value)
}
}
}
}
impl<T> Drop for CircularLinkedList<T> where T: std::fmt::Debug {
fn drop(&mut self) {
while let Some(_) = self.dequeue() {}
}
}
}
fn main () {
let mut list = LinkedList::CircularLinkedList::new();
list.enqueue(1);
list.enqueue(2);
list.dequeue();
list.dequeue();
println!("{:?}", list.peek());
}
我正在尝试用 Rust 实现循环链表。我已经阅读了 Too Many Linked Lists 和关于 Rust Pin 类型的多个资源,并且我试图从头开始构建一个循环链表,但我对编译器吐出的内容感到困惑。
所以高级概念是我想要一个链表结构和节点结构定义如下:
use std::pin::Pin;
use std::marker::PhantomPinned;
use std::ptr;
#[derive(Debug)]
struct Node<T> {
value: T,
next: *mut Node<T>,
_pin: PhantomPinned,
}
#[derive(Debug)]
pub struct CircularLinkedList<T> {
last: *mut Node<T>,
size: usize
}
对于第一个被推入列表的节点,我希望 node.next 指向节点结构本身。即我希望在列表中推送新节点的逻辑遵循以下 Java 实现
public void push(Item item) {
Node node = new Node();
node.item = item;
if (isEmpty()) {
last = node;
last.next = last;
} else {
node.next = last.next;
last.next = node;
last = node;
}
size++;
}
到目前为止我的 Rust 实现是:
impl<T> CircularLinkedList<T> where T: std::fmt::Debug {
pub fn new() -> Self {
CircularLinkedList { last: ptr::null_mut(), size: 0 }
}
pub fn enqueue(&mut self, value: T) {
self.size = self.size + 1;
if self.last.is_null() {
let new_last = Node {
value,
next: ptr::null_mut(),
_pin: PhantomPinned,
};
// Pin new node
let mut pinned_new_last = Box::pin(new_last);
// Get raw pointer to new node
let ptr = pinned_new_last.as_ref().get_ref() as *const Node<T> as *mut Node<T>;
// Assign next new_last.next value of 'ptr'
unsafe {
let mut_ref: Pin<&mut Node<T>> = Pin::as_mut(&mut pinned_new_last);
Pin::get_unchecked_mut(mut_ref).next = ptr;
}
self.last = ptr;
}
else {
println!("else");
}
}
pub fn peek(&self) -> Option<&T> {
unsafe {
let a = &(*self.last).value;
Some(a)
}
}
}
在下面的 main() 函数中,我很困惑为什么 peek() 没有 return Some(1)
而 miri 给我一个内存错误
fn main () {
let mut list = LinkedList::CircularLinkedList::new();
list.enqueue(1);
println!("{:?}", list.peek());
// Some(2043) !????????, expecting Some(1)
}
警告:除非您完全理解代码的所有细节,否则不要编写 unsafe
代码! (虽然实验很好)。
你得到了 Use-After-Free。在 enqueue()
的末尾,Box
的析构函数为 pinned_new_last
执行并释放其内存。现在您的链接列表指向已释放的节点。在这种情况下添加 std::mem::forget()
就足够了:
pub fn enqueue(&mut self, value: T) {
self.size = self.size + 1;
if self.last.is_null() {
let new_last = Node {
value,
next: ptr::null_mut(),
_pin: PhantomPinned,
};
// Pin new node
let mut pinned_new_last = Box::pin(new_last);
// Get raw pointer to new node
let ptr = pinned_new_last.as_ref().get_ref() as *const Node<T> as *mut Node<T>;
// Assign next new_last.next value of 'ptr'
unsafe {
let mut_ref: Pin<&mut Node<T>> = Pin::as_mut(&mut pinned_new_last);
Pin::get_unchecked_mut(mut_ref).next = ptr;
}
std::mem::forget(pinned_new_last);
self.last = ptr;
} else {
println!("else");
}
}
当然会泄露内存。您必须注意在析构函数中释放它,例如:
impl<T> Drop for CircularLinkedList<T> {
fn drop(&mut self) {
let mut last = self.last;
let size = self.size;
// Panic safety
self.size = 0;
self.last = std::ptr::null_mut();
for _ in 0..size {
let current = last;
unsafe {
last = (*last).next;
Box::from_raw(current); // Drop it
}
}
}
}
另请注意,虽然存储节点作为*mut
指针是合理的,但实际上变异它是UB派生自共享引用,所以这确实是一种代码味道。
我花了一些时间来了解有关 Rust 的更多信息,并且能够提出以下不会引发任何 miri 错误的实现
pub mod LinkedList {
use std::marker::PhantomPinned;
use std::ptr;
use std::mem::ManuallyDrop;
// _pin -> make the struct implement !Unpin, and make it pinnable
#[derive(Debug)]
struct Node<T> where T: std::fmt::Debug {
value: T,
next: *mut Node<T>,
_pin: PhantomPinned,
}
#[derive(Debug)]
pub struct CircularLinkedList<T> where T: std::fmt::Debug {
last: *mut Node<T>,
size: usize
}
impl<T> CircularLinkedList<T> where T: std::fmt::Debug {
pub fn new() -> Self {
CircularLinkedList { last: ptr::null_mut(), size: 0 }
}
pub fn enqueue(&mut self, value: T) {
self.size = self.size + 1;
// Create a new Node struct, assign it to new_last variable
// Struct - no guarantees on data layout
let new_last = Node {
value,
next: ptr::null_mut(),
_pin: PhantomPinned,
};
// Construct a new <Pin<Box<T>>>
// new_last is pinned in heap, and can't move from this address
// pinned_new_last is Pin<Box<T>> type - a smart pointer
// Wrap in ManuallyDrop - 0-cost wrapper that stop compiler from calling destructor for pinned_new_last
// When Pin<Box<T>> dropped, both the pointer and pointee dropped, so need to clean this up in the drop
let mut pinned_new_last = ManuallyDrop::new(Box::pin(new_last));
if self.last.is_null() {
unsafe {
// as_mut gets Pin<Box<Node>> => Pin<&mut Node>
// get_unchecked_mut gets Pin<&mut Node> => &mut Node
// We coerce it into a *mut T
pinned_new_last.as_mut().get_unchecked_mut().next = pinned_new_last.as_mut().get_unchecked_mut() as *mut Node<T>;
self.last = pinned_new_last.as_mut().get_unchecked_mut() as *mut Node<T>;
}
} else {
unsafe {
pinned_new_last.as_mut().get_unchecked_mut().next = (*self.last).next;
(*self.last).next = pinned_new_last.as_mut().get_unchecked_mut() as *mut Node<T>;
self.last = pinned_new_last.as_mut().get_unchecked_mut() as *mut Node<T>;
}
}
}
pub fn dequeue(&mut self) -> Option<T> {
if self.last.is_null() {None}
else {
self.size = self.size - 1;
unsafe {
// This fixed the memory leak!!!
// Let Box destructor call destructor of T and free allocated memory
let last = Box::from_raw(self.last);
if self.size == 0 {self.last = ptr::null_mut();}
else {self.last = last.next;}
let value = (*last).value;
Some(value)
}
}
}
pub fn peek(&self) -> Option<&T> {
if self.last.is_null() {None}
else {
unsafe {
// Deref self.last (it is a *mut Node)
// Get value field
// Cast entire thing to &
Some(&(*self.last).value)
}
}
}
}
impl<T> Drop for CircularLinkedList<T> where T: std::fmt::Debug {
fn drop(&mut self) {
while let Some(_) = self.dequeue() {}
}
}
}
fn main () {
let mut list = LinkedList::CircularLinkedList::new();
list.enqueue(1);
list.enqueue(2);
list.dequeue();
list.dequeue();
println!("{:?}", list.peek());
}