Rust Pointer
Table of Contents
1. Rust Pointer
1.1. slice
struct Test(i64, i64); fn format_of_string() -> () { println!("------\nformat of String:"); let s = "abcde".to_owned(); println!("size: {:?}", std::mem::size_of_val(&s)); let pp = &s as *const _ as *const [i64; 3]; unsafe { println!("string addr is {:?}", pp); println!( "string value is 0x{:x} 0x{:x} 0x{:x}", (*pp)[0], (*pp)[1], (*pp)[2], ); let p_heap = (*pp)[0] as *const [u8; 6]; println!("heap value is {:?}", *p_heap); } } fn format_of_str() -> () { println!("------\nformat of &str:"); let orig = "abc".to_owned(); let s = &orig[1..]; println!("size: {:?}", std::mem::size_of_val(&s)); let pp = &s as *const _ as *const [i64; 2]; let pp_orig = &orig as *const _ as *const [i64; 3]; unsafe { println!("string addr is {:?}", pp_orig); println!( "string value is 0x{:x} 0x{:x} 0x{:x}", (*pp_orig)[0], (*pp_orig)[1], (*pp_orig)[2], ); println!("str addr is {:?}", pp); println!( "str value is 0x{:x} 0x{:x}", (*pp)[0], (*pp)[1], ); } } fn main() { format_of_string(); format_of_str(); }
------ format of String: size: 24 string addr is 0x7ffc65d77828 string value is 0x5 0x5cb326830b80 0x5
- String 本身位于栈上, 格式为 `[pointer_to_heap, size, capacity]`, pointer_to_heap 指向堆, 保存 String 真正的内容
- &str 格式为 `[pointer_to_heap, size]`, 它直接指向 heap + slice_offset, 而不是指向栈上的 String 对象
所以 &str 以及 &slice 中的 & 操作并非直接取地址, 它们`生成`一个 `fat pointer`,且 pointer 会指向真正的 payload 而不是栈上的 String 或 array.
另外, `fat pointer` 中的 size 用来确保运行时访问不越界
1.2. Box
1.2.1. 内部原理
struct Test(i64, i64); fn main() { let mut p = Box::new(Test(1234, 5678)); println!("size: {:?}", std::mem::size_of_val(&p)); let pp = &p as *const _ as *const [i64; 1]; unsafe { println!("box addr is {:?}", pp); println!("box value is 0x{:x}", (*pp)[0]); let p_heap = (*pp)[0] as *const [i64; 2]; println!("heap value is {:?}", *p_heap); } }
size: 8 box addr is 0x7ffddf26ba58 box value is 0x56b77c0bcb80 heap value is [1234, 5678]
Box 自身分配在栈上, 它只包含一个指向堆的地址 (String 除了地址还包括 size 和 capacity), payload 实际在堆上.
1.2.2. Box 使用
1.2.2.1. 使用 reference 实现链表
#[derive(Debug)] struct ListNode<'a> { val: i32, next: Option<&'a ListNode<'a>>, } impl<'a> ListNode<'a> { fn new(val: i32) -> (ListNode<'a>) { return ListNode { val: val, next: None, }; } } #[derive(Debug)] struct LinkedList<'a> { root: Option<&'a ListNode<'a>>, } impl<'a> LinkedList<'a> { fn new() -> LinkedList<'a> { return LinkedList { root: None }; } fn add_node(&mut self, node: &'a mut ListNode<'a>) -> () { node.next = self.root; self.root = Some(node); } } fn main() { let mut list = LinkedList::new(); let mut node1 = ListNode::new(1); let mut node2 = ListNode::new(2); let mut node3 = ListNode::new(3); list.add_node(&mut node1); list.add_node(&mut node2); list.add_node(&mut node3); println!("{:?}", list); }
LinkedList { root: Some(ListNode { val: 3, next: Some(ListNode { val: 2, next: Some(ListNode { val: 1, next: None }) }) }) }
这种实现的问题是:
- 由于 node 栈上分配的原因, struct ListNode 定义时无法指定 next 为 TreeNode, 因为递归定义的结构体无法在编译时确定大小, 只能定义 next 为 reference
- 需要指定许多 lifetime annotation 确定 reference 有效
- next 只是引用, 所以 parent 并不是 node 的 owner, 需要额外的 owner (node1, node2, node3) 确保 reference 有效
- 由于栈上分配的原因, 无法把封装在一个函数中. 因为 shadow copy 时不会 copy reference
1.2.2.2. 使用 Box 实现链表
#[derive(Debug)] struct ListNode { val: i32, next: Option<Box<ListNode>>, } impl ListNode { fn new(val: i32) -> Box<ListNode> { Box::new(ListNode { val: val, next: None, }) } } #[derive(Debug)] struct LinkedList { root: Option<Box<ListNode>>, } impl LinkedList { fn new() -> LinkedList { LinkedList { root: None } } fn push_front(&mut self, mut node: Box<ListNode>) -> () { let t = std::mem::replace(&mut self.root, None); node.next = t; self.root = Some(node); } fn push_back(&mut self, node: Box<ListNode>) -> () { let mut fake_root = ListNode::new(0); let t = std::mem::replace(&mut self.root, None); fake_root.next = t; let mut head = &mut fake_root; while let Some(ref mut n) = head.next { head = n } head.next = Some(node); self.root = fake_root.next; } fn pop_front(&mut self) -> () { if let Some(ref mut node) = self.root { self.root = std::mem::replace(&mut node.next, None);; } } } fn main() { let mut list = LinkedList::new(); list.push_front(ListNode::new(1)); list.push_back(ListNode::new(2)); list.pop_front(); println!("{:?}", list); }
LinkedList { root: Some(ListNode { val: 2, next: None }) }
使用 Box 无需考虑 owner 的问题, 但还是需要处理用 reference 和 mem::replace 来防止 Box 被 move,
1.2.3. Box 的应用场景
- 递归定义的数据结构
- 堆上分配
单一 owner 可以搞定的问题, 例如单向链表 (双向链表和树, 图等可能难以用 Box 处理)
let node = TreeNode::new(1); prev.next = node; // node 已经被 move 到 prev.next, 无法再赋值给 next.prev next.prev = node;
1.3. Rc
rust 编译时只允许单一 owner, 通过 Rc 可以支持双向链表等需要多个 owner 的场景.
Rc 即 reference counting, 多个不同的 Rc 可以在内部 `own` 同一个资源, `运行时` 通过引用计数的方式决定何时释放资源.
在编译时一个 Rc 还是只有一个 owner, 编译器并不知道多个 Rc `own` 同一个资源.
1.3.1. 内部原理
Box 的 clone 会复制所有数据, 包含堆上的数据
#[derive(Clone)] struct Test(i64, i64); fn main() { let p = Box::new(Test(1234, 5678)); println!("box size: {:?}", std::mem::size_of_val(&p)); let pp = &p as *const _ as *const [i64; 1]; unsafe { println!("box addr is {:?}", pp); println!("box value is 0x{:x}", (*pp)[0]); let p_heap = (*pp)[0] as *const [i64; 2]; println!("heap value is {:?}", *p_heap); } let p_cloned = p.clone(); println!("rc size: {:?}", std::mem::size_of_val(&p_cloned)); let pp_cloned = &p_cloned as *const _ as *const [i64; 1]; unsafe { println!("cloned box addr is {:?}", pp_cloned); println!("cloned box value is 0x{:x}", (*pp_cloned)[0]); let p_heap = (*pp_cloned)[0] as *const [i64; 2]; println!("heap value is {:?}", *p_heap); } }
box size: 8 box addr is 0x7ffe6084c258 box value is 0x56ac7639db80 heap value is [1234, 5678] rc size: 8 cloned box addr is 0x7ffe6084c3b8 cloned box value is 0x56ac7639dba0 heap value is [1234, 5678]
Rc 的 clone 只是 shadow copy
#[derive(Clone)] struct Test(i64, i64); use std::rc::Rc; fn main() { let p = Rc::new(Test(1234, 5678)); let pp = &p as *const _ as *const [i64; 2]; unsafe { println!("rc addr is {:?}", pp); println!("rc value is 0x{:x}", (*pp)[0]); let p_heap = (*pp)[0] as *const [i64; 4]; println!("heap value is {:?}", *p_heap); } println!("strong_count: {:?}", Rc::strong_count(&p)); println!(""); let p_cloned = p.clone(); let pp_cloned = &p_cloned as *const _ as *const [i64; 1]; unsafe { println!("cloned box addr is {:?}", pp_cloned); println!("cloned box value is 0x{:x}", (*pp_cloned)[0]); let p_heap = (*pp_cloned)[0] as *const [i64; 4]; println!("heap value is {:?}", *p_heap); } println!("strong_count: {:?}", Rc::strong_count(&p)); println!("{:?}", Rc::into_raw(p)); }
rc addr is 0x7ffc48b8eeb0 rc value is 0x601c2a5e3ae0 heap value is [1, 1, 1234, 5678] strong_count: 1 cloned box addr is 0x7ffc48b8f040 cloned box value is 0x601c2a5e3ae0 heap value is [2, 1, 1234, 5678] strong_count: 2 0x601c2a5e3af0
Rc 不是 fat pointer, 它只包含一个堆上的地址, 真正的数据以及引用计数等放在堆上.
1.3.2. 使用 Rc 实现链表
使用 Rc 实现链表并不比 Box 简单, 主要原因是, 在遍历 list 时, 可以这样:
let mut head = fake_root.clone(); while let Some(n) = head.next.clone() { head = n; } Rc::get_mut(&mut head).unwrap().next = Some(node); self.root = fake_root.next.clone();
问题是 Rc 只有在存在一个 owner 时才可以通过 get_mut 来修改, 类似于读写锁的限制, 导致我们在遍历时实际上无法修改, 因为至少有两个 owner 存在.
1.4. RefCell
RefCell 并不是指针类型, 和 owner 机制无关. 它只是一个 wrapper, 使非 mutable 的变量也可以在运行时被修改.
因为 Rc 使得可以有多个 owner, 但这时却难以对数据进行修改, 通过 RefCell, 可以方便的修改数据.
RefCell 通过 borrow() 和 borrow_mut() 实现了运行时的 `读写锁` 而非 Rc 那种编译时
use std::cell::RefCell; use std::rc::Rc; #[derive(Debug)] struct ListNode { val: i32, next: Option<Rc<RefCell<ListNode>>>, } impl ListNode { fn new(val: i32) -> (Rc<RefCell<ListNode>>) { return Rc::new(RefCell::new(ListNode { val: val, next: None, })); } } #[derive(Debug)] struct LinkedList { root: Option<Rc<RefCell<ListNode>>>, } impl LinkedList { fn new() -> LinkedList { return LinkedList { root: None }; } fn push_front(&mut self, node: Rc<RefCell<ListNode>>) -> () { node.borrow_mut().next = self.root.clone(); self.root = Some(node); } fn push_back(&mut self, node: Rc<RefCell<ListNode>>) -> () { let mut fake_root = ListNode::new(0); fake_root.borrow_mut().next = self.root.clone(); let mut head = fake_root.clone(); while let Some(n) = head.clone().borrow().next.clone() { head = n; } head.borrow_mut().next = Some(node); self.root = fake_root.borrow().next.clone(); } } fn main() { let mut list = LinkedList::new(); list.push_front(ListNode::new(1)); list.push_back(ListNode::new(2)); println!("{:?}", list); }
LinkedList { root: Some(RefCell { value: ListNode { val: 1, next: Some(RefCell { value: ListNode { val: 2, next: None } }) } }) }