Rust Pointer

Table of Contents

1. Rust Pointer

1.1. slice

struct Test(i64, i64);

fn format_of_string() -> () {
    println!("------\nformat of String:");
    let s = "abcde".to_owned();
    println!("size: {:?}", std::mem::size_of_val(&s));

    let pp = &s as *const _ as *const [i64; 3];
    unsafe {
        println!("string addr is {:?}", pp);
        println!(
            "string value is 0x{:x} 0x{:x} 0x{:x}",
            (*pp)[0],
            (*pp)[1],
            (*pp)[2],
        );

        let p_heap = (*pp)[0] as *const [u8; 6];
        println!("heap value is {:?}", *p_heap);
    }
}

fn format_of_str() -> () {
    println!("------\nformat of &str:");
    let orig = "abc".to_owned();


    let s = &orig[1..];
    println!("size: {:?}", std::mem::size_of_val(&s));

    let pp = &s as *const _ as *const [i64; 2];
    let pp_orig = &orig as *const _ as *const [i64; 3];
    unsafe {
        println!("string addr is {:?}", pp_orig);
        println!(
            "string value is 0x{:x} 0x{:x} 0x{:x}",
            (*pp_orig)[0],
            (*pp_orig)[1],
            (*pp_orig)[2],
        );

        println!("str addr is {:?}", pp);
        println!(
            "str value is 0x{:x} 0x{:x}",
            (*pp)[0],
            (*pp)[1],
        );
    }
}

fn main() {
    format_of_string();
    format_of_str();
}
------
format of String:
size: 24
string addr is 0x7ffc65d77828
string value is 0x5 0x5cb326830b80 0x5
  1. String 本身位于栈上, 格式为 `[pointer_to_heap, size, capacity]`, pointer_to_heap 指向堆, 保存 String 真正的内容
  2. &str 格式为 `[pointer_to_heap, size]`, 它直接指向 heap + slice_offset, 而不是指向栈上的 String 对象

所以 &str 以及 &slice 中的 & 操作并非直接取地址, 它们`生成`一个 `fat pointer`,且 pointer 会指向真正的 payload 而不是栈上的 String 或 array.

另外, `fat pointer` 中的 size 用来确保运行时访问不越界

1.2. Box

1.2.1. 内部原理

struct Test(i64, i64);

fn main() {
    let mut p = Box::new(Test(1234, 5678));
    println!("size: {:?}", std::mem::size_of_val(&p));
    let pp = &p as *const _ as *const [i64; 1];
    unsafe {
        println!("box addr is {:?}", pp);
        println!("box value is 0x{:x}", (*pp)[0]);

        let p_heap = (*pp)[0] as *const [i64; 2];
        println!("heap value is {:?}", *p_heap);
    }
}
size: 8
box addr is 0x7ffddf26ba58
box value is 0x56b77c0bcb80
heap value is [1234, 5678]

Box 自身分配在栈上, 它只包含一个指向堆的地址 (String 除了地址还包括 size 和 capacity), payload 实际在堆上.

1.2.2. Box 使用

1.2.2.1. 使用 reference 实现链表
#[derive(Debug)]
struct ListNode<'a> {
    val: i32,
    next: Option<&'a ListNode<'a>>,
}
impl<'a> ListNode<'a> {
    fn new(val: i32) -> (ListNode<'a>) {
        return ListNode {
            val: val,
            next: None,
        };
    }
}
#[derive(Debug)]
struct LinkedList<'a> {
    root: Option<&'a ListNode<'a>>,
}

impl<'a> LinkedList<'a> {
    fn new() -> LinkedList<'a> {
        return LinkedList { root: None };
    }
    fn add_node(&mut self, node: &'a mut ListNode<'a>) -> () {
        node.next = self.root;
        self.root = Some(node);
    }
}
fn main() {
    let mut list = LinkedList::new();

    let mut node1 = ListNode::new(1);
    let mut node2 = ListNode::new(2);
    let mut node3 = ListNode::new(3);

    list.add_node(&mut node1);
    list.add_node(&mut node2);
    list.add_node(&mut node3);

    println!("{:?}", list);
}
LinkedList { root: Some(ListNode { val: 3, next: Some(ListNode { val: 2, next: Some(ListNode { val: 1, next: None }) }) }) }

这种实现的问题是:

  1. 由于 node 栈上分配的原因, struct ListNode 定义时无法指定 next 为 TreeNode, 因为递归定义的结构体无法在编译时确定大小, 只能定义 next 为 reference
  2. 需要指定许多 lifetime annotation 确定 reference 有效
  3. next 只是引用, 所以 parent 并不是 node 的 owner, 需要额外的 owner (node1, node2, node3) 确保 reference 有效
  4. 由于栈上分配的原因, 无法把封装在一个函数中. 因为 shadow copy 时不会 copy reference
1.2.2.2. 使用 Box 实现链表
#[derive(Debug)]
struct ListNode {
    val: i32,
    next: Option<Box<ListNode>>,
}

impl ListNode {
    fn new(val: i32) -> Box<ListNode> {
        Box::new(ListNode {
            val: val,
            next: None,
        })
    }
}

#[derive(Debug)]
struct LinkedList {
    root: Option<Box<ListNode>>,
}

impl LinkedList {
    fn new() -> LinkedList {
        LinkedList { root: None }
    }

    fn push_front(&mut self, mut node: Box<ListNode>) -> () {
        let t = std::mem::replace(&mut self.root, None);
        node.next = t;
        self.root = Some(node);
    }

    fn push_back(&mut self, node: Box<ListNode>) -> () {
        let mut fake_root = ListNode::new(0);
        let t = std::mem::replace(&mut self.root, None);
        fake_root.next = t;
        let mut head = &mut fake_root;
        while let Some(ref mut n) = head.next {
            head = n
        }
        head.next = Some(node);
        self.root = fake_root.next;
    }

    fn pop_front(&mut self) -> () {
        if let Some(ref mut node) = self.root {
            self.root = std::mem::replace(&mut node.next, None);;
        }
    }
}

fn main() {
    let mut list = LinkedList::new();
    list.push_front(ListNode::new(1));
    list.push_back(ListNode::new(2));
    list.pop_front();
    println!("{:?}", list);
}
LinkedList { root: Some(ListNode { val: 2, next: None }) }

使用 Box 无需考虑 owner 的问题, 但还是需要处理用 reference 和 mem::replace 来防止 Box 被 move,

1.2.3. Box 的应用场景

  1. 递归定义的数据结构
  2. 堆上分配
  3. 单一 owner 可以搞定的问题, 例如单向链表 (双向链表和树, 图等可能难以用 Box 处理)

    let node = TreeNode::new(1);
    prev.next = node;
    // node 已经被 move 到 prev.next, 无法再赋值给 next.prev
    next.prev = node;
    

1.3. Rc

rust 编译时只允许单一 owner, 通过 Rc 可以支持双向链表等需要多个 owner 的场景.

Rc 即 reference counting, 多个不同的 Rc 可以在内部 `own` 同一个资源, `运行时` 通过引用计数的方式决定何时释放资源.

在编译时一个 Rc 还是只有一个 owner, 编译器并不知道多个 Rc `own` 同一个资源.

1.3.1. 内部原理

  1. Box 的 clone 会复制所有数据, 包含堆上的数据

    #[derive(Clone)]
    struct Test(i64, i64);
    
    fn main() {
        let p = Box::new(Test(1234, 5678));
        println!("box size: {:?}", std::mem::size_of_val(&p));
        let pp = &p as *const _ as *const [i64; 1];
        unsafe {
            println!("box addr is {:?}", pp);
            println!("box value is 0x{:x}", (*pp)[0]);
    
            let p_heap = (*pp)[0] as *const [i64; 2];
            println!("heap value is {:?}", *p_heap);
        }
    
        let p_cloned = p.clone();
        println!("rc size: {:?}", std::mem::size_of_val(&p_cloned));
        let pp_cloned = &p_cloned as *const _ as *const [i64; 1];
        unsafe {
            println!("cloned box addr is {:?}", pp_cloned);
            println!("cloned box value is 0x{:x}", (*pp_cloned)[0]);
    
            let p_heap = (*pp_cloned)[0] as *const [i64; 2];
            println!("heap value is {:?}", *p_heap);
        }
    }
    
    
    box size: 8
    box addr is 0x7ffe6084c258
    box value is 0x56ac7639db80
    heap value is [1234, 5678]
    rc size: 8
    cloned box addr is 0x7ffe6084c3b8
    cloned box value is 0x56ac7639dba0
    heap value is [1234, 5678]
    
  2. Rc 的 clone 只是 shadow copy

    #[derive(Clone)]
    struct Test(i64, i64);
    
    use std::rc::Rc;
    
    fn main() {
        let p = Rc::new(Test(1234, 5678));
    
        let pp = &p as *const _ as *const [i64; 2];
        unsafe {
            println!("rc addr is {:?}", pp);
            println!("rc value is 0x{:x}", (*pp)[0]);
    
            let p_heap = (*pp)[0] as *const [i64; 4];
            println!("heap value is {:?}", *p_heap);
        }
        println!("strong_count: {:?}", Rc::strong_count(&p));
    
        println!("");
        let p_cloned = p.clone();
        let pp_cloned = &p_cloned as *const _ as *const [i64; 1];
        unsafe {
            println!("cloned box addr is {:?}", pp_cloned);
            println!("cloned box value is 0x{:x}", (*pp_cloned)[0]);
    
            let p_heap = (*pp_cloned)[0] as *const [i64; 4];
            println!("heap value is {:?}", *p_heap);
        }
        println!("strong_count: {:?}", Rc::strong_count(&p));
        println!("{:?}", Rc::into_raw(p));
    }
    
    
    rc addr is 0x7ffc48b8eeb0
    rc value is 0x601c2a5e3ae0
    heap value is [1, 1, 1234, 5678]
    strong_count: 1
    
    cloned box addr is 0x7ffc48b8f040
    cloned box value is 0x601c2a5e3ae0
    heap value is [2, 1, 1234, 5678]
    strong_count: 2
    0x601c2a5e3af0
    

    Rc 不是 fat pointer, 它只包含一个堆上的地址, 真正的数据以及引用计数等放在堆上.

1.3.2. 使用 Rc 实现链表

使用 Rc 实现链表并不比 Box 简单, 主要原因是, 在遍历 list 时, 可以这样:

let mut head = fake_root.clone();
while let Some(n) = head.next.clone() {
    head = n;
}
Rc::get_mut(&mut head).unwrap().next = Some(node);
self.root = fake_root.next.clone();

问题是 Rc 只有在存在一个 owner 时才可以通过 get_mut 来修改, 类似于读写锁的限制, 导致我们在遍历时实际上无法修改, 因为至少有两个 owner 存在.

1.4. RefCell

RefCell 并不是指针类型, 和 owner 机制无关. 它只是一个 wrapper, 使非 mutable 的变量也可以在运行时被修改.

因为 Rc 使得可以有多个 owner, 但这时却难以对数据进行修改, 通过 RefCell, 可以方便的修改数据.

RefCell 通过 borrow() 和 borrow_mut() 实现了运行时的 `读写锁` 而非 Rc 那种编译时

use std::cell::RefCell;
use std::rc::Rc;

#[derive(Debug)]
struct ListNode {
    val: i32,
    next: Option<Rc<RefCell<ListNode>>>,
}
impl ListNode {
    fn new(val: i32) -> (Rc<RefCell<ListNode>>) {
        return Rc::new(RefCell::new(ListNode {
            val: val,
            next: None,
        }));
    }
}
#[derive(Debug)]
struct LinkedList {
    root: Option<Rc<RefCell<ListNode>>>,
}

impl LinkedList {
    fn new() -> LinkedList {
        return LinkedList { root: None };
    }
    fn push_front(&mut self, node: Rc<RefCell<ListNode>>) -> () {
        node.borrow_mut().next = self.root.clone();
        self.root = Some(node);
    }

    fn push_back(&mut self, node: Rc<RefCell<ListNode>>) -> () {
        let mut fake_root = ListNode::new(0);
        fake_root.borrow_mut().next = self.root.clone();

        let mut head = fake_root.clone();
        while let Some(n) = head.clone().borrow().next.clone() {
            head = n;
        }
        head.borrow_mut().next = Some(node);

        self.root = fake_root.borrow().next.clone();
    }
}

fn main() {
    let mut list = LinkedList::new();

    list.push_front(ListNode::new(1));
    list.push_back(ListNode::new(2));

    println!("{:?}", list);
}
LinkedList { root: Some(RefCell { value: ListNode { val: 1, next: Some(RefCell { value: ListNode { val: 2, next: None } }) } }) }

Author: [email protected]
Date: 2018-12-28 Fri 00:00
Last updated: 2024-08-05 Mon 17:58

知识共享许可协议