import numpy as np
np.random.seed(42)
labels = np.random.randint(0, 2, size=(5000000)).astype(np.float32)
preds = np.random.uniform(0, 1, size=(5000000)).astype(np.float32)

def pfbeta(labels, predictions, beta=1):
    y_true_count = 0
    ctp = 0
    cfp = 0

    for idx in range(len(labels)):
        prediction = min(max(predictions[idx], 0), 1)
        if (labels[idx]):
            y_true_count += 1
            ctp += prediction
        else:
            cfp += prediction

    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return 0

%%time
pfbeta(labels, preds)

CPU times: user 23.2 s, sys: 3.52 ms, total: 23.2 s
Wall time: 23.2 s

0.4999093296483036

def pfbeta_np(labels, preds, beta=1):
    preds = preds.clip(0, 1)
    y_true_count = labels.sum()
    ctp = preds[labels==1].sum()
    cfp = preds[labels==0].sum()
    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return 0.0

%%time
pfbeta_np(labels, preds)

CPU times: user 107 ms, sys: 12.9 ms, total: 120 ms
Wall time: 119 ms

0.4999094292348718

from numba import njit, jit
# @jit(nopython=True)
@njit
def pfbeta_numba(labels, preds, beta=1):
    preds = preds.clip(0, 1)
    y_true_count = labels.sum()
    ctp = preds[labels==1].sum()
    cfp = preds[labels==0].sum()
    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return 0.0

# run first time to compile
pfbeta_numba(labels, preds)

0.4998940004682342

%%time
pfbeta_numba(labels, preds)

CPU times: user 85.8 ms, sys: 0 ns, total: 85.8 ms
Wall time: 86.3 ms

0.4998940004682342

def pfbeta_tf(labels, preds, beta=1):
    preds = tf.clip_by_value(preds, 0, 1)
    y_true_count = tf.reduce_sum(labels)
    ctp = tf.reduce_sum(preds[labels==1])
    cfp = tf.reduce_sum(preds[labels==0])
    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return 0.0

import tensorflow as tf
labels = tf.convert_to_tensor(labels)
preds = tf.convert_to_tensor(preds)

2022-12-29 15:00:09.520919: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.

%%time
pfbeta_tf(labels, preds)

CPU times: user 181 ms, sys: 56.7 ms, total: 238 ms
Wall time: 210 ms

<tf.Tensor: shape=(), dtype=float32, numpy=0.49990934>

class pFBeta(tf.keras.metrics.Metric):
    """Compute overall probabilistic F-beta score."""
    def __init__(self, beta=1, epsilon=1e-5, name='pfbeta', **kwargs):
        super().__init__(name=name, **kwargs)
        self.beta = beta
        self.epsilon = epsilon
        self.pos = self.add_weight(name='pos', initializer='zeros')
        self.ctp = self.add_weight(name='ctp', initializer='zeros')
        self.cfp = self.add_weight(name='cfp', initializer='zeros')
        
    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.clip_by_value(y_pred, 0, 1)
        pos = tf.reduce_sum(y_true)
        ctp = tf.reduce_sum(y_pred[y_true==1])
        cfp = tf.reduce_sum(y_pred[y_true==0])
        self.pos.assign_add(pos)
        self.ctp.assign_add(ctp)
        self.cfp.assign_add(cfp)
        
    def result(self):
        beta_squared = self.beta * self.beta
        c_precision = self.ctp / (self.ctp + self.cfp + self.epsilon)
        c_recall = self.ctp / (self.pos + self.epsilon)
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return tf.cond(c_precision > 0 and c_recall > 0, lambda: result, lambda: 0.0)

pfbeta_tf_v2 = pFBeta(beta=1, epsilon=0)

%%time
pfbeta_tf_v2.update_state(labels, preds)
pfbeta_tf_v2.result()

CPU times: user 530 ms, sys: 1.83 ms, total: 532 ms
Wall time: 469 ms

<tf.Tensor: shape=(), dtype=float32, numpy=0.49990934>

pfbeta_tf_v2.update_state(labels, preds+0.05)
pfbeta_tf_v2.result()

<tf.Tensor: shape=(), dtype=float32, numpy=0.5118099>

pfbeta_tf(tf.concat([labels, labels],0), tf.concat([preds, preds+0.05],0))

<tf.Tensor: shape=(), dtype=float32, numpy=0.51180995>

def pfbeta_torch(labels, preds, beta=1):
    preds = preds.clip(0, 1)
    y_true_count = labels.sum()
    ctp = preds[labels==1].sum()
    cfp = preds[labels==0].sum()
    beta_squared = beta * beta
    c_precision = ctp / (ctp + cfp)
    c_recall = ctp / y_true_count
    if (c_precision > 0 and c_recall > 0):
        result = (1 + beta_squared) * (c_precision * c_recall) / (beta_squared * c_precision + c_recall)
        return result
    else:
        return 0.0

import torch
labels = torch.Tensor(labels.numpy())
preds = torch.Tensor(preds.numpy())

%%time
pfbeta_torch(labels, preds)

CPU times: user 199 ms, sys: 73.1 ms, total: 272 ms
Wall time: 147 ms

tensor(0.4999)

Implementation¶

Dummy Labels and Predictions¶

Naive Python (using `for` loop)¶

Numpy¶

Numba¶

TensorFlow¶

Batchwise¶

Overall¶

Torch (Same as `numpy`)¶

Implementation¶

Dummy Labels and Predictions¶

Naive Python (using for loop)¶

Numpy¶

Numba¶

TensorFlow¶

Batchwise¶

Overall¶

Torch (Same as numpy)¶

Naive Python (using `for` loop)¶

Torch (Same as `numpy`)¶