Skip to content

Instances of subclasses of PyLong_Type created in C extensions use Java object handles, possibly causing segmentation faults #595

@actapia

Description

@actapia

With GraalPy 3.12.8 on Linux, built from source at f466f8d , subclasses of PyLong_Type created using the C API return Java handles from __new__ instead of real PyObject pointers. When an extension attempts to access an instance of the PyLong_Type subclass as a PyObject, it tries to dereference the Java handle, possibly causing a segmentation fault.

A real example of code that can expose this bug can be seen in boost::python's enum.cpp. In the enum_base::add_value method, boost::python needs to access the name member of the enum_object struct it uses for the instances of the enum_type_object, but this fails on GraalPy. Similarly, the C example extension below attempts to access the member member of the custom_object struct in its set_member function.

/* custom_pyobject.c */
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <stddef.h>

typedef struct custom_object {
  PyLongObject base_object;
  PyObject* member;
} custom_object;

static PyMemberDef custom_members[] = {
  {"member", Py_T_OBJECT_EX, offsetof(custom_object, member), Py_READONLY, 0},
  {0, 0, 0, 0, 0}
};

static void custom_dealloc(custom_object* self) {
  Py_XDECREF(self->member);
  Py_TYPE(self)->tp_free((PyObject*)self);
}

static PyObject* custom_repr(PyObject* self_) {
  custom_object* self = (custom_object*)self_;
  return PyUnicode_FromFormat("custom_object(%S)", PyObject_Repr(self->member));
}

static PyTypeObject custom_type_object = {
  PyObject_HEAD_INIT(NULL)
  "custom_object",                          /* tp_name */
  sizeof(custom_object),                    /* tp_basicsize */
  0,                                        /* tp_itemsize */
  (destructor) custom_dealloc,              /* tp_dealloc */
  0,                                        /* tp_print */
  0,                                        /* tp_getattr */
  0,                                        /* tp_setattr */
  0,                                        /* tp_compare */
  custom_repr,                              /* tp_repr */
  0,                                        /* tp_as_number */
  0,                                        /* tp_as_sequence */
  0,                                        /* tp_as_mapping */
  0,                                        /* tp_hash */
  0,                                        /* tp_call */
  0,                                        /* tp_str */
  0,                                        /* tp_getattro */
  0,                                        /* tp_setattro */
  0,                                        /* tp_as_buffer */
  Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
  0,                                        /* tp_doc */
  0,                                        /* tp_traverse */
  0,                                        /* tp_clear */
  0,                                        /* tp_richcompare */
  0,                                        /* tp_weaklistoffset */
  0,                                        /* tp_iter */
  0,                                        /* tp_iternext */
  0,                                        /* tp_methods */
  custom_members,                           /* tp_members */
  0,                                        /* tp_getset */
  &PyLong_Type,                             /* tp_base */
  0,                                        /* tp_dict */
  0,                                        /* tp_descr_get */
  0,                                        /* tp_descr_set */
  0,                                        /* tp_dictoffset */
  0,                                        /* tp_init */
  0,                                        /* tp_alloc */
  0,                                        /* tp_new */
  0,                                        /* tp_free */
  0,                                        /* tp_is_gc */
  0,                                        /* tp_bases */
  0,                                        /* tp_mro */
  0,                                        /* tp_cache */
  0,                                        /* tp_subclasses */
  0,                                        /* tp_weaklist */
  0                                         /* tp_del */
};

int custom_pyobject_mod_exec(PyObject* module) {
  PyModule_AddType(module, &custom_type_object);
  return 0;
}

static PyModuleDef_Slot custom_pyobject_slots[] = {
  {Py_mod_exec, custom_pyobject_mod_exec},
  {0, NULL}
};

PyObject* set_member(PyObject* self, PyObject* args) {
  PyObject* inner1;
  PyObject* inner2;
  if (!PyArg_ParseTuple(args, "OO", &inner1, &inner2)) {
    return NULL;
  }
  custom_object* obj = (custom_object*)inner1;
  obj->member = inner2;
  return Py_None;
}

static PyMethodDef custom_pyobject_methods[] = {
  {"set_member", set_member, METH_VARARGS, "Set name."},
  {NULL, NULL, 0, NULL}
};

static struct PyModuleDef custom_pyobject_module = {
  .m_base = PyModuleDef_HEAD_INIT,
  .m_name = "custom_pyobject",
  .m_size = 0,
  .m_methods = custom_pyobject_methods,
  .m_slots = custom_pyobject_slots
};

PyMODINIT_FUNC PyInit_custom_pyobject(void) {
  return PyModuleDef_Init(&custom_pyobject_module);
}

If the code above is compiled to custom_pyobject.so, then the Python code below fails with a segmentation fault on GraalPy:

import custom_pyobject
obj = custom_pyobject.custom_object(10)
custom_pyobject.set_member(obj, "foo")
print(obj)

With CPython, the code prints custom_object('foo').

The relevant discussion on Slack can be found at https://graalvm.slack.com/archives/CNA7PDH2N/p1768011927297709 .

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions