cython简明使用

Cython目标

Cython主要是针对python性能太差,解决尤其是cpu矩阵运算,循环运算的性能问题而诞生。

Cython将你的类python代码编译成c链接库,你可以自己定义哪些运算可以交给c代码去运行。如果你想使用Cython,代表你已经开始关注计算性能了,榨取性能不只是交给C来处理就应该满足的,你可能还需要关注python语句里可优化的代码。

安装

1
$ pip3 install Cython

配置Pycharm

Preferences -> Tools -> External Tools新添加一个用户配置

1
2
3
4
5
# $xxx$均是宏插入,本设置用于将.pyx代码编译为c时所用
Tool Setting:
- Program: /to/path/bin/python3
- Arguments: $FilePath$ build_ext --inplace
- Working directory: $FileDir$

.pxd/.pyd/.pyx区别

  • .pxd

.pxd是扩展模块头文件,类似于C语言的.h头文件,.pxd文件中有 Cython模块要包含的Cython声明。.pxd文件还可为.pyx文件模块提供 Cython接口,以便其它Cython模块可使用比Python更高效的协议与之进行通信。

可用cimport关键字将.pxd文件导入.pyx模块文件中。

  • .pyx

.pyx是扩展模块源代码文件,类似于C语言的.c源代码文件,.pyx文件中有 Cython模块的源代码。

.pyx必须先被编译成.c文件,再编译成.pyd(Windows)或.so(Linux)文件,才可作为模块import导入使用。

  • .pyd

.pyd是由其它编程语言”编写-编译”生成的Python扩展模块。Python要导入.pyd文件,把它当成module来用就可以了,”import ${path}.modulename”。

示例一. 使用c代码

1
2
3
4
5
+-- cfib.c
+-- cfib.h
+-- fib.pyx
+-- setup.py
+-- main.py
1
2
3
4
5
//cfib.h
#ifndef __CFIB_H__
#define __CFIB_H__
unsigned long fib(unsigned long n);
#endif
1
2
3
4
5
6
7
8
9
//cfib.c
#include "cfib.h"
unsigned long fib(unsigned long n) {
unsigned long a=0, b=1, i, tmp;
for (i=0; i<n; ++i) {
tmp = a; a = a + b; b = tmp;
}
return a;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
//fib.pyx
cdef extern from "cfib.h":
unsigned long _fib "fib"(unsigned long n)

def fib_c(n):
''' Returns the nth Fibonacci number.'''
return _fib(n)

def fib_cython(n):
'''Returns the nth Fibonacci number.'''
a, b = 0, 1
for i in range(n):
a, b = a + b, a
return a

def fib_cython_optimized(unsigned long n):
'''Returns the nth Fibonacci number.'''
cdef unsigned long a=0, b=1, i
for i in range(n):
a, b = a + b, a
return a
1
2
3
4
5
//setup.py
from distutils.core import setup, Extension
from Cython.Build import cythonize

setup(ext_modules = cythonize(Extension(name="fib", sources=["cfib.c", "fib.pyx"])))

右键setup.py,External Tools -> accountname编译

性能测试:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
def fib_python(n):
'''Returns the nth Fibonacci number.'''
a, b = 0, 1
for i in range(n):
a, b = a + b, a
return a

if __name__ == '__main__':
import timeit
python_setup = "from __main__ import fib_python"
cython_setup = "import fib"
print("Python code: ", timeit.timeit('fib_python(47)', setup=python_setup), "seconds")
print("Cython code: ", timeit.timeit('fib.fib_cython(47)', setup=cython_setup), "seconds")
print("Optimized Cython code: ", timeit.timeit('fib.fib_cython_optimized(47)', setup=cython_setup), "seconds")
print("C code: ", timeit.timeit('fib.fib_c(47)', setup=cython_setup), "seconds")

##结果:
Python code: 2.9352622053858113 seconds
Cython code: 1.7331176511158422 seconds
Optimized Cython code: 0.14643933094340067 seconds
C code: 0.11884286952119272 seconds

示例二. 径向基函数的近似计算

  • 公式:径向基函数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
//fastloop.pyx
# distutils: language = c++
# cython: language_level = 3
# cython: cdivision = True
# cython: boundscheck = False
# cython: wraparound = False
# cython: profile = False
# cython: nonecheck = False
//或者:
# cython: language_level=3, boundscheck=False, wraparound=False, nonecheck=False
from libc.math cimport exp
def rbf_network(double[:, :] X, double[:] beta, double theta):
from numpy import zeros
cdef int N = X.shape[0]
cdef int D = X.shape[1]
cdef double[:] Y = zeros(N)
cdef int i, j, d
cdef double r = 0
for i in range(N):
for j in range(N):
r = 0
for d in range(D):
r += (X[j, d]-X[i, d])**2
r = r**0.5
Y[i] += beta[j] * exp(-(r*theta)**2)

return Y
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
//setup.py
from distutils.core import setup, Extension
from Cython.Build import cythonize
from Cython.Distutils import build_ext
import os

os.environ["CC"] = "gcc-8"
os.environ["CXX"] = "g++-8"

# setup(
# ext_modules = cythonize(Extension(name="fib", sources=["cfib.c", "fib.pyx"]),
# language_level=3
# )
# )

ext_modules=[Extension(name="fastloop",
sources=["fastloop.pyx"],
libraries=["m"],
language='c',
extra_compile_args=['-O3', -ffast-math','-fopenmp'],
extra_link_args=['-fopenmp'],

setup(
name='fastloop',
cmdclass={"build_ext": build_ext},
ext_modules=ext_modules
)

性能测试:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from math import exp
import numpy as np

setup = '''
import numpy as np;
from fastloop import rbf_network;
D = 5;
N = 1000;
X = np.array([np.random.rand(D) for d in range(N)]);
beta = np.random.rand(N);
theta = 10;'''

setup1 = '''
from __main__ import rbf_network;
import numpy as np;
from math import exp;
D = 5;
N = 1000;
X = np.array([np.random.rand(D) for d in range(N)]);
beta = np.random.rand(N);
theta = 10;'''

def rbf_network(X, beta, theta):
N = X.shape[0]
D = X.shape[1]
Y = np.zeros(N)

for i in range(N):
for j in range(N):
r = 0
for d in range(D):
r += (X[j, d] - X[i, d]) ** 2
r = r ** 0.5
Y[i] += beta[j] * exp(-(r * theta) ** 2)

return Y

if __name__ == '__main__':
import timeit
print(timeit.timeit(stmt='rbf_network(X, beta, theta)',
setup=setup,
number=1))
print(timeit.timeit(stmt='rbf_network(X, beta, theta)',
setup=setup1,
number=1))
1
2
3
##结果:
0.018067925004288554
4.64309394301381

cython怎样找到性能优化点

1
$ cython xxxxx.pyx -a

生成的html黄色部分则是可以做性能优化的点。

如上例二fastloop.pyx,使用

1
from libc.math cimport exp

代替

1
from math import exp

其他

timeit

1
timeit.timeit(stmt='pass', setup='pass', timer=<default timer>, number=1000000, globals=None)

python调用c

使用ctypes

1
2
3
4
5
from ctypes import cdll,c_char_p,c_int,c_double
# libc.dylib必须在环境变量lib里,其他还有如libm.dylib等
cdll_names = 'libc.dylib'
clib = cdll.LoadLibrary(cdll_names)
clib.printf(c_char_p("Hello %d %f".encode('utf-8')),c_int(15),c_double(2.3))

python代码优化

  • 减少变量公有,能的就用变为私有
  • 尽量使用函数,减少全局声明和定义
  • 尽可能去掉属性访问,减少.的使用次数,尤其是循环中
  • 更多使用局部变量,尽量少的全局变量
  • 避免不必要的抽象,可以直接访问,不要用get/set方法
  • 使用内置的数据类型,自己封装的类更慢
  • 避免创建不必要的数据结构或复制

参考文献