Created 2025/01/04 at 01:22AM

Last Modified 2025/01/13 at 09:20AM

Ever wish you could write your performance-critical code in a single performant language, yet still tap into that functionality from other languages like Python, Go etc? Shared Object files (.so files) are your secret weapon for bridging language boundaries. In this post, we'll explore how .so files hold the key to language-agnostic bliss.

What are Shared Object files?

A shared object file (.so file on Linux) is a binary file that contains compiled code that multiple programs or languages can dynamically link to at runtime. Shared libraries are commonly used to encapsulate functionality and promote code reuse across applications.

Core Concepts

Common Challenges

Does Using a .so File Speed Up Processing?

Yes! Using a .so file can speed up processing, but the extent of the speed improvement depends on several factors:

Does the Speed Depend on the Source Language?

Yes! The source language can affect the execution speed of the .so file, depending on:

Remember, we are discussing about computationally expensive tasks. For I/O-bound tasks (e.g., reading/writing files, making network requests), the speed of .so files from different languages may not matter because: - The bottleneck is not CPU-bound but dependent on external systems. - Overhead from Python interfacing may dominate execution time.


Example

Let's try to implement BFS to find length of shortest path between a source and a destination, in both C++ and Cython, compile them to .so files and we'll use these .so files with Python and Golang.

C++ implementation

// bfs.cpp
#include <bits/stdc++.h>

extern "C" {
    int find_shortest_path(int n, int edgesCount, int* edges, int source, int destination) {
        std::vector<std::vector<int>> g(n);

        for(int i = 0; i < edgesCount; ++i) {
            int u = edges[2*i];
            int v = edges[2*i + 1];
            g[u].emplace_back(v);
            g[v].emplace_back(u);
        }

        int *dist = new int[n];
        std::memset(dist, -1, sizeof(int) * n);
        std::deque<int> q;

        dist[source] = 0;
        q.push_back(source);

        while (!q.empty()) {
            int u = q.front();
            q.pop_front();

            if(u == destination) {
                int ret = dist[u];
                delete[] dist;
                return ret;
            }

            for(int neighbor : g[u]) {
                if(dist[neighbor] == -1) {
                    dist[neighbor] = dist[u] + 1;
                    q.push_back(neighbor);
                }
            }
        }

        delete[] dist;
        return -1;
    }
}

We compile this using g++ -O3 -shared -fPIC bfs.cpp -o libbfs.so

Cython implementation

We disabled bounds checking and wraparound with the directives at the top (# cython: boundscheck=False, etc.) for performance.

# bfs.pyx
# cython: boundscheck=False
# cython: wraparound=False

def _helper_bfs(n, edges_count, edges, source, destination):
    from collections import deque

    adjacency = [[] for _ in range(n)]
    for i in range(edges_count):
        u = edges[2*i]
        v = edges[2*i + 1]
        adjacency[u].append(v)
        adjacency[v].append(u)

    dist = [-1] * n
    dist[source] = 0

    q = deque()
    q.append(source)

    while q:
        current = q.popleft()
        if current == destination:
            return dist[current]

        for neighbor in adjacency[current]:
            if dist[neighbor] == -1:
                dist[neighbor] = dist[current] + 1
                q.append(neighbor)

    return -1

def find_shortest_path(int n, int edges_count, int[:] edges, int source, int destination) -> int:
    cdef list py_edges = [0]*(2 * edges_count)
    cdef int i
    for i in range(2 * edges_count):
        py_edges[i] = edges[i]

    return _helper_bfs(n, edges_count, py_edges, source, destination)

To compile bfs.pyx file, we also need to create a setup.py.

# setup.py
from setuptools import setup, Extension
from Cython.Build import cythonize

extensions = [
    Extension(
        name="bfs",
        sources=["bfs.pyx"],
    )
]

setup(
    name="bfs",
    ext_modules=cythonize(extensions, language_level="3"),
)

And, now to get .so file from it, we run python setup.py build_ext --inplace. This gives us a .so file named bfs.cpython-38-x86_64-linux-gnu.so.


Inspecting the .so files

Comparing size

$ du -sh libbfs.so 
20K libbfs.so

$ du -sh bfs.cpython-38-x86_64-linux-gnu.so 
1.3M    bfs.cpython-38-x86_64-linux-gnu.so

Lets use objdump to see the symbols in each .so file.

$ objdump -T libbfs.so | wc -l
31

$ objdump -T bfs.cpython-38-x86_64-linux-gnu.so | wc -l 
162

The one compiled from C++ has fewer symbols because it mostly has just our BFS function and standard library calls, while the one compiled from cython includes Python/C API symbols or references to them as well.


We are going to use following egdes as setup for all experiments and number of nodes as 300.

Edges
edges = [(0, 11), (0, 177), (0, 194), (0, 288), (1, 8), (1, 68), (1, 178), (1, 243), (1, 251), (2, 202), (3, 76), (3, 114), (3, 135), (3, 139), (4, 80), (4, 154), (4, 176), (4, 187), (4, 235), (5, 88), (5, 94), (5, 96), (5, 99), (5, 123), (5, 151), (5, 241), (5, 290), (6, 100), (6, 105), (6, 263), (7, 33), (7, 52), (7, 62), (7, 74), (7, 178), (7, 199), (7, 276), (8, 29), (8, 166), (8, 177), (8, 187), (9, 250), (10, 233), (10, 237), (11, 137), (11, 143), (11, 194), (12, 146), (12, 243), (12, 245), (13, 24), (13, 99), (13, 238), (13, 255), (14, 24), (14, 46), (14, 57), (14, 70), (14, 82), (14, 147), (14, 169), (14, 189), (14, 268), (15, 66), (15, 149), (15, 193), (15, 247), (16, 165), (16, 216), (16, 226), (16, 227), (16, 253), (17, 90), (17, 182), (18, 201), (18, 235), (18, 289), (19, 248), (20, 103), (20, 109), (20, 263), (21, 113), (21, 138), (21, 170), (21, 171), (21, 265), (22, 79), (22, 105), (22, 211), (22, 272), (23, 78), (23, 200), (23, 226), (24, 27), (24, 121), (24, 143), (24, 268), (25, 66), (25, 95), (25, 154), (25, 169), (25, 201), (26, 156), (27, 140), (27, 264), (27, 276), (27, 295), (28, 63), (28, 123), (29, 51), (29, 153), (29, 283), (29, 289), (30, 39), (30, 72), (30, 73), (31, 233), (32, 67), (32, 93), (32, 287), (33, 140), (33, 141), (33, 209), (33, 227), (33, 264), (33, 289), (34, 66), (34, 81), (34, 113), (34, 156), (34, 182), (34, 260), (34, 297), (35, 75), (35, 123), (35, 176), (35, 222), (36, 61), (36, 118), (36, 119), (36, 121), (36, 130), (36, 170), (36, 218), (37, 153), (37, 181), (37, 234), (38, 181), (39, 87), (39, 105), (39, 106), (40, 278), (41, 55), (41, 211), (41, 271), (41, 298), (42, 217), (42, 223), (42, 239), (43, 52), (43, 60), (43, 139), (43, 251), (43, 290), (44, 48), (44, 99), (44, 112), (44, 164), (44, 211), (45, 98), (45, 196), (46, 102), (46, 111), (46, 180), (46, 248), (46, 294), (47, 122), (47, 189), (48, 71), (48, 132), (48, 230), (49, 234), (49, 291), (50, 57), (50, 73), (51, 62), (51, 155), (51, 196), (52, 120), (52, 180), (53, 195), (53, 216), (53, 226), (53, 288), (54, 87), (54, 185), (54, 186), (55, 76), (55, 180), (56, 63), (56, 90), (56, 277), (57, 161), (58, 96), (58, 124), (58, 130), (58, 206), (58, 207), (58, 223), (58, 252), (59, 90), (59, 138), (59, 184), (60, 82), (61, 127), (61, 285), (62, 123), (62, 141), (62, 163), (62, 196), (62, 296), (63, 113), (63, 164), (64, 89), (64, 131), (65, 135), (65, 159), (65, 236), (65, 253), (65, 275), (65, 289), (66, 144), (66, 164), (66, 282), (66, 284), (67, 74), (67, 136), (67, 208), (67, 285), (67, 288), (68, 82), (68, 140), (69, 91), (69, 111), (69, 113), (69, 175), (69, 178), (70, 159), (70, 224), (70, 237), (70, 294), (71, 81), (71, 160), (72, 158), (72, 172), (72, 196), (72, 230), (73, 279), (74, 108), (74, 126), (74, 265), (75, 86), (75, 202), (76, 101), (76, 169), (76, 294), (77, 126), (77, 137), (77, 148), (77, 151), (77, 186), (77, 264), (78, 115), (78, 265), (79, 92), (79, 167), (79, 194), (80, 169), (80, 173), (80, 220), (80, 243), (80, 277), (81, 198), (81, 221), (82, 112), (82, 194), (82, 205), (82, 219), (82, 234), (82, 242), (82, 259), (82, 264), (82, 292), (83, 93), (83, 202), (83, 210), (83, 275), (84, 216), (85, 183), (85, 226), (85, 228), (86, 165), (86, 268), (87, 144), (88, 99), (88, 242), (88, 293), (89, 96), (89, 173), (89, 234), (89, 244), (89, 254), (90, 124), (90, 130), (90, 189), (90, 252), (91, 269), (92, 98), (92, 128), (92, 234), (92, 265), (93, 171), (93, 190), (93, 275), (94, 119), (94, 148), (94, 203), (94, 206), (94, 274), (95, 211), (96, 144), (96, 207), (97, 124), (97, 157), (97, 229), (97, 249), (98, 117), (98, 124), (98, 126), (98, 172), (98, 257), (98, 265), (99, 123), (99, 147), (99, 179), (100, 106), (100, 138), (100, 170), (100, 221), (100, 223), (101, 112), (101, 170), (101, 280), (101, 291), (102, 208), (102, 271), (103, 128), (103, 173), (103, 268), (104, 299), (105, 114), (105, 274), (106, 203), (107, 165), (108, 215), (108, 262), (109, 146), (110, 298), (113, 291), (114, 142), (114, 143), (114, 282), (115, 222), (115, 258), (115, 264), (116, 217), (116, 293), (117, 192), (118, 169), (118, 263), (118, 269), (119, 170), (119, 188), (119, 220), (119, 242), (119, 279), (120, 224), (120, 229), (120, 287), (121, 180), (121, 186), (121, 270), (121, 277), (122, 127), (122, 134), (122, 137), (123, 169), (123, 186), (123, 227), (123, 242), (123, 293), (124, 286), (125, 185), (126, 186), (126, 188), (126, 243), (126, 265), (127, 181), (127, 187), (127, 213), (128, 164), (129, 253), (130, 189), (130, 210), (131, 170), (131, 285), (132, 224), (132, 296), (133, 171), (133, 255), (135, 206), (137, 165), (137, 176), (137, 271), (137, 299), (138, 204), (138, 220), (138, 235), (139, 170), (139, 188), (140, 196), (140, 284), (141, 169), (141, 267), (142, 170), (142, 236), (143, 150), (144, 257), (144, 299), (145, 161), (145, 191), (145, 222), (145, 268), (145, 291), (146, 179), (146, 201), (147, 255), (148, 252), (148, 261), (148, 270), (149, 163), (149, 294), (150, 211), (150, 247), (150, 266), (152, 171), (152, 191), (152, 284), (153, 155), (155, 225), (155, 291), (156, 276), (157, 220), (157, 234), (157, 236), (157, 298), (158, 235), (159, 207), (159, 295), (160, 190), (160, 203), (160, 206), (160, 253), (161, 228), (161, 268), (161, 272), (162, 174), (163, 216), (163, 238), (164, 294), (165, 181), (166, 170), (167, 172), (167, 237), (168, 269), (168, 273), (169, 184), (169, 222), (169, 269), (170, 232), (170, 255), (170, 263), (172, 206), (172, 233), (172, 291), (174, 185), (174, 186), (174, 192), (174, 207), (174, 214), (174, 255), (175, 244), (175, 288), (177, 235), (177, 278), (178, 264), (179, 189), (180, 190), (180, 227), (180, 234), (182, 186), (182, 258), (184, 203), (187, 206), (187, 291), (188, 248), (188, 264), (188, 273), (188, 285), (189, 190), (189, 201), (190, 293), (194, 265), (195, 207), (195, 260), (198, 272), (199, 210), (199, 293), (200, 264), (201, 241), (202, 266), (202, 272), (202, 275), (203, 209), (204, 214), (205, 265), (208, 222), (208, 240), (208, 258), (209, 242), (209, 247), (209, 279), (210, 224), (210, 282), (210, 290), (213, 216), (213, 249), (213, 251), (213, 288), (214, 230), (214, 234), (215, 247), (215, 269), (216, 242), (217, 235), (218, 262), (219, 256), (219, 263), (220, 254), (221, 275), (221, 293), (224, 235), (225, 239), (225, 292), (227, 292), (227, 297), (228, 254), (228, 276), (229, 278), (229, 286), (230, 259), (230, 287), (233, 289), (236, 244), (237, 254), (237, 297), (238, 267), (239, 277), (241, 281), (243, 281), (243, 293), (243, 296), (249, 260), (250, 289), (250, 293), (250, 297), (252, 268), (254, 292), (261, 276), (261, 278), (262, 278), (262, 289), (263, 278), (263, 279), (263, 296), (272, 286), (273, 278), (282, 297), (284, 298), (294, 298)]

Using libbfs.so In Python

# bfs_from_cpp_so.py
import ctypes

libbfs = ctypes.CDLL('./libbfs.so')

libbfs.find_shortest_path.argtypes = [
    ctypes.c_int,
    ctypes.c_int,
    ctypes.POINTER(ctypes.c_int),
    ctypes.c_int,
    ctypes.c_int
]
libbfs.find_shortest_path.restype = ctypes.c_int

def get_shortest_path_length(n, edges, source, destination):
    edges_flat = []
    for (u, v) in edges:
        edges_flat.append(u)
        edges_flat.append(v)

    edges_array = (ctypes.c_int * len(edges_flat))(*edges_flat)

    return libbfs.find_shortest_path(n, len(edges), edges_array, source, destination)

if __name__ == "__main__":
    n = 300
    edges = ...

    max_bfs_len = -1
    max_bfs_src, max_bfs_dst = None, None
    for source in range(n):
        for destination in range(n):
            if source == destination:
                continue
            distance = get_shortest_path_length(n, edges, source, destination)
            if distance == -1:
                continue
            if distance > max_bfs_len:
                max_bfs_len = distance
                max_bfs_src = source
                max_bfs_dst = destination

    if max_bfs_len > -1:
        print(f"longest 'shortest distance' is from {max_bfs_src} to {max_bfs_dst}:", max_bfs_len)
$ /bin/time -v python bfs_from_cpp_so.py
longest 'shortest distance' is from 2 to 19: 8
    User time (seconds): 9.84
    ...

Using bfs.cpython-38-x86_64-linux-gnu.so In Python

# bfs_from_pyx_so.py
import ctypes
import bfs


def get_shortest_path_length(n, edges, source, destination):
    edges_flat = []
    for (u, v) in edges:
        edges_flat.append(u)
        edges_flat.append(v)

    edges_array = (ctypes.c_int * len(edges_flat))(*edges_flat)

    return bfs.find_shortest_path(n, len(edges), edges_array, source, destination)

if __name__ == "__main__":
    n = 300
    edges = ...

    max_bfs_len = -1
    max_bfs_src, max_bfs_dst = None, None
    for source in range(n):
        for destination in range(n):
            if source == destination:
                continue
            distance = get_shortest_path_length(n, edges, source, destination)
            if distance == -1:
                continue
            if distance > max_bfs_len:
                max_bfs_len = distance
                max_bfs_src = source
                max_bfs_dst = destination

    if max_bfs_len > -1:
        print(f"longest 'shortest distance' is from {max_bfs_src} to {max_bfs_dst}:", max_bfs_len)
$ /bin/time -v python bfs_from_pyx_so.py
longest 'shortest distance' is from 2 to 19: 8
    User time (seconds): 13.79
    ...

Using libbfs.so In Golang

// main.go
package main

/*
#cgo LDFLAGS: -L. -lbfs

// Forward-declare the BFS function
int find_shortest_path(int n, int edgesCount, int* edges, int source, int destination);
*/
import "C"
import (
    "fmt"
    "unsafe"
)

type edge struct {
    u int
    v int
}

func main() {
    edges := []edge{...}
    flattenedEdges := []int{}
    for _, e := range edges {
        flattenedEdges = append(flattenedEdges, e.u)
        flattenedEdges = append(flattenedEdges, e.v)
    }
    n := 300
    edgesCount := len(edges)

    cEdges := make([]C.int, len(flattenedEdges))
    for i, v := range flattenedEdges {
        cEdges[i] = C.int(v)
    }

    maxBfsLen := C.int(-1)
    maxBfsSrc := -1
    maxBfsDst := -1
    for source := 0; source < n; source++ {
        for destination := 0; destination < n; destination++ {
            if source == destination {
                continue
            }
            dist := C.find_shortest_path(C.int(n), C.int(edgesCount), (*C.int)(unsafe.Pointer(&cEdges[0])), C.int(source), C.int(destination))
            if dist == -1 {
                continue
            }
            if dist > maxBfsLen {
                maxBfsLen = dist
                maxBfsSrc = source
                maxBfsDst = destination
            }
        }
    }
    fmt.Printf("longest 'shortest distance' is from %d to %d: %d\n", maxBfsSrc, maxBfsDst, maxBfsLen)
}

To build, run export LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH && go build -o main main.go

$ ./main
longest 'shortest distance' is from 2 to 19: 8

Keep In Mind While Using .so Files