PyTorch使用了一种高效的内存管理机制,特别是通过intrusive_ptr(侵入式智能指针)和Allocator(分配器)来管理Tensor的内存。intrusive_ptr基于引用计数原理,能够自动释放不再被引用的内存,从而避免内存泄漏。Allocator则负责实际的内存分配和释放操作,通过与intrusive_ptr的协同工作,实现了内存的高效利用和回收。 2. 计算图机...
structC10_APIStorageImpl:publicc10::intrusive_ptr_target{public:StorageImpl(use_byte_size_t/*use_byte_size*/,constSymInt&size_bytes,at::Allocator*allocator,boolresizable):StorageImpl(use_byte_size_t(),size_bytes,size_bytes.is_heap_allocated()?allocator->allocate(0):allocator->allocate(size_by...
ProcessGroupNCCL::ProcessGroupNCCL( const c10::intrusive_ptr<Store>& store, int rank, int size, c10::intrusive_ptr<Options> options) : Backend(rank, size), store_(store), options_(options), ncclCommCounter_(0), traceKeyStart_(getTraceStartKey("NCCL", rank)), traceKeyEnd_(getTraceEndKe...
intrusive_ptr_target& operator=(intrusive_ptr_target&& other) noexcept { return *this; } // 拷贝构造 intrusive_ptr_target(const intrusive_ptr_target& other) noexcept : intrusive_ptr_target() {} // 拷贝赋值 intrusive_ptr_target& operator=(const intrusive_ptr_target& other) noexcept { return ...
c10::intrusive_ptr<c10::ivalue::Future> executeSendFunctionAsync( const ContextPtr& autogradContext, const std::shared_ptr<SendRpcBackward>& sendFunction, bool retainGraph); // Number of backward passes currently running for the Distributed Engine. ...
c10::intrusive_ptr<c10::ivalue::Future>DistEngine::executeSendFunctionAsync(constContextPtr&autogradContext,conststd::shared_ptr<SendRpcBackward>&sendFunction,bool retainGraph){// Typically the local autograd engine ensures stream synchronizations between// nodes in the graph. However, for distributed...
(int rank, int size);c10::intrusive_ptr<Work> allgather(std::vector<std::vector<at::Tensor>>& outputTensors,std::vector<at::Tensor>& inputTensors,const AllgatherOptions& opts = AllgatherOptions()) override;c10::intrusive_ptr<Work> allreduce(std::vector<at::Tensor>& tensors,const ...
c10::intrusive_ptr<JitFuture>sendMessageWithAutograd(RpcAgent&agent,constWorkerInfo&dst,torch::distributed::rpc::Message&&wrappedRpcMsg,bool forceGradRecording,constfloat rpcTimeoutSeconds,bool forceDisableProfiling){auto msg=getMessageWithAutograd(// 这里会与上下文交互,构建了 FORWARD_AUTOGRAD_REQdst.id...
std::set<c10::intrusive_ptr<c10d::ProcessGroup::Work>>> currentPendingSends_; ThreadPool threadPool_;// Mapping of request id to FutureInfo struct.std::unordered_map<int64_t, FutureInfo> futures_; }; 2.2.3 TensorPipeAgent TensorPipeAgent 定义在 torch/csrc/distributed/rpc/tensorpipe_agent...
继承自intrusive_ptr_target即实现了 “侵入式计数” 成员变量 Storagestorage_; std::unique_ptrc10::AutogradMetaInterfaceautograd_meta_ SmallVector<int64_t,5>sizes_; SmallVector<int64_t,5>strides_; int64_tstorage_offset_ PyObject*pyobj_ 一个表示这个tensor的PyObject的弱引用(weak reference) ...