Stats: Add self-sponsored hours and contributor counts.

Fixes #124

Some variables/keys to clarify their names, or makes them less verbose. See #198, #200.
This commit is contained in:
Ian Dunn 2022-08-03 14:50:16 -07:00
parent 967718263f
commit 9e4d322b3d
No known key found for this signature in database
GPG key ID: 99B971B50343CBCB
3 changed files with 113 additions and 40 deletions

View file

@ -367,6 +367,8 @@ function get_contributor_user_ids( $contributor_posts ) {
$wpdb->prepare( $query, $usernames ) $wpdb->prepare( $query, $usernames )
); );
$user_ids = array_map( 'absint', $user_ids );
return $user_ids; return $user_ids;
} }

View file

@ -58,47 +58,64 @@ function schedule_cron_jobs() {
/** /**
* Record a snapshot of the current stats, so we can track trends over time. * Record a snapshot of the current stats, so we can track trends over time.
*
* "Self-sponsored" contributors are those that volunteer their time rather than being paid by a company.
*/ */
function record_snapshot() { function record_snapshot() {
$stats = get_snapshot_data(); $stats = get_snapshot_data();
bump_stats_extra( 'five-for-the-future', 'Company-sponsored hours', $stats['confirmed_company_hours'] ); bump_stats_extra( 'five-for-the-future', 'Self-sponsored hours', $stats['self_sponsored_hours'] );
bump_stats_extra( 'five-for-the-future', 'Company-sponsored contributors', $stats['confirmed_company_contributors'] ); bump_stats_extra( 'five-for-the-future', 'Self-sponsored contributors', $stats['self_sponsored_contributors'] );
bump_stats_extra( 'five-for-the-future', 'Companies', $stats['confirmed_companies'] ); bump_stats_extra( 'five-for-the-future', 'Companies', $stats['companies'] );
bump_stats_extra( 'five-for-the-future', 'Company-sponsored hours', $stats['company_sponsored_hours'] );
bump_stats_extra( 'five-for-the-future', 'Company-sponsored contributors', $stats['company_sponsored_contributors'] );
foreach ( $stats['confirmed_team_company_contributors'] as $team => $contributors ) { foreach ( array( 'team_company_sponsored_contributors', 'team_self_sponsored_contributors' ) as $key ) {
foreach ( $stats[ $key ] as $team => $contributors ) {
// The labels are listed alphabetically in MC, so starting them all with "Team" groups them together and // The labels are listed alphabetically in MC, so starting them all with "Team" groups them together and
// makes the interface easier to use. // makes the interface easier to use.
$grouped_name = sprintf( 'Team %s company-sponsored contributors', str_replace( ' Team', '', $team ) ); $grouped_name = sprintf(
'Team %s %s-sponsored contributors',
str_replace( ' Team', '', $team ),
str_contains( $key, 'self' ) ? 'self' : 'company'
);
bump_stats_extra( 'five-for-the-future', $grouped_name, $contributors ); bump_stats_extra( 'five-for-the-future', $grouped_name, $contributors );
} }
} }
}
/** /**
* Calculate the stats for the current snapshot. * Calculate the stats for the current snapshot.
* *
* This will be processing a large amount of data, so `unset()` is used throughout the function on variables that
* are no longer needed. That should help to avoid out-of-memory errors.
*
* @return array * @return array
*/ */
function get_snapshot_data() { function get_snapshot_data() {
$snapshot_data = array( $snapshot_data = array(
'confirmed_company_hours' => 0, 'company_sponsored_hours' => 0,
'confirmed_team_company_contributors' => array(), 'self_sponsored_hours' => 0,
'team_company_sponsored_contributors' => array(),
'team_self_sponsored_contributors' => array(),
); );
$confirmed_companies = new WP_Query( array( $companies = new WP_Query( array(
'post_type' => Pledge\CPT_ID, 'post_type' => Pledge\CPT_ID,
'post_status' => 'publish', 'post_status' => 'publish',
'numberposts' => 1, // We only need `found_posts`, not the posts themselves. 'numberposts' => 1, // We only need `found_posts`, not the posts themselves.
) ); ) );
$snapshot_data['confirmed_companies'] = $confirmed_companies->found_posts; $snapshot_data['companies'] = $companies->found_posts;
unset( $companies );
/* /*
* A potential future optimization would be make WP_Query only return the `post_title`. The `fields` parameter * A potential future optimization would be make WP_Query only return the `post_title`. The `fields` parameter
* doesn't currently support `post_title`, but it may be possible with filters like `posts_fields` * doesn't currently support `post_title`, but it may be possible with filters like `posts_fields`
* or `posts_fields_request`. That was premature at the time this code was written, though. * or `posts_fields_request`. That was premature at the time this code was written, though.
*/ */
$confirmed_company_contributors = get_posts( array( $company_sponsored_contributors = get_posts( array(
'post_type' => Contributor\CPT_ID, 'post_type' => Contributor\CPT_ID,
'post_status' => 'publish', 'post_status' => 'publish',
'numberposts' => -1, 'numberposts' => -1,
@ -112,37 +129,35 @@ function get_snapshot_data() {
* but `WP_Query` doesn't support `DISTINCT` directly, and it's premature at this point. It may be possible * but `WP_Query` doesn't support `DISTINCT` directly, and it's premature at this point. It may be possible
* with the filters mentioned above. * with the filters mentioned above.
*/ */
$confirmed_user_ids = array_unique( Contributor\get_contributor_user_ids( $confirmed_company_contributors ) ); $company_contributor_user_ids = array_unique( Contributor\get_contributor_user_ids( $company_sponsored_contributors ) );
$snapshot_data['confirmed_company_contributors'] = count( $confirmed_user_ids ); unset( $company_sponsored_contributors );
$company_contributors_profile_data = XProfile\get_xprofile_contribution_data( $confirmed_user_ids );
foreach ( $company_contributors_profile_data as $profile_data ) { $all_contributor_profiles = XProfile\get_all_xprofile_contributor_hours_teams();
switch ( (int) $profile_data['field_id'] ) { $snapshot_data['company_sponsored_contributors'] = count( $company_contributor_user_ids );
case XProfile\FIELD_IDS['hours_per_week']: $snapshot_data['self_sponsored_contributors'] = count( $all_contributor_profiles ) - count( $company_contributor_user_ids );
$snapshot_data['confirmed_company_hours'] += absint( $profile_data['value'] );
break;
case XProfile\FIELD_IDS['team_names']: foreach ( $all_contributor_profiles as $profile ) {
/* $attribution_prefix = in_array( $profile->user_id, $company_contributor_user_ids, true )
* BuddyPress validates the team name(s) the user provides before saving them in the database, so ? 'company_sponsored'
* it should be safe to unserialize, and to assume that they're valid. : 'self_sponsored';
*
* The database stores team _names_ rather than _IDs_, though, so if a team is ever renamed, this
* data will be distorted.
*/
$associated_teams = (array) maybe_unserialize( $profile_data['value'] );
foreach ( $associated_teams as $team ) { $team_contributor_key = sprintf( 'team_%s_contributors', $attribution_prefix );
if ( isset( $snapshot_data['confirmed_team_company_contributors'][ $team ] ) ) {
$snapshot_data['confirmed_team_company_contributors'][ $team ]++; $snapshot_data[ $attribution_prefix . '_hours'] += $profile->hours_per_week;
foreach ( $profile->team_names as $team ) {
if ( isset( $snapshot_data[ $team_contributor_key ][ $team ] ) ) {
$snapshot_data[ $team_contributor_key ][ $team ] ++;
} else { } else {
$snapshot_data['confirmed_team_company_contributors'][ $team ] = 1; $snapshot_data[ $team_contributor_key ][ $team ] = 1;
} }
} }
}
unset( $all_contributor_profiles );
break; // Alphabetize so that they appear in a consistent order in the MC interface.
} ksort( $snapshot_data['team_company_sponsored_contributors'] );
} ksort( $snapshot_data['team_self_sponsored_contributors'] );
return $snapshot_data; return $snapshot_data;
} }
@ -150,6 +165,9 @@ function get_snapshot_data() {
/** /**
* Render the shortcode to display stats. * Render the shortcode to display stats.
* *
* @deprecated Stats were originally kept in these posts, but are currently stored in MC. This is kept so that we
* have a historical record.
*
* @return string * @return string
*/ */
function render_shortcode() { function render_shortcode() {

View file

@ -2,7 +2,7 @@
namespace WordPressDotOrg\FiveForTheFuture\XProfile; namespace WordPressDotOrg\FiveForTheFuture\XProfile;
use WordPressDotOrg\FiveForTheFuture\Contributor; use WordPressDotOrg\FiveForTheFuture\Contributor;
use wpdb; use WPDB;
/* /*
* The IDs of the xprofile fields we need. Better to use the numerical IDs than the field labels, * The IDs of the xprofile fields we need. Better to use the numerical IDs than the field labels,
@ -16,13 +16,66 @@ const FIELD_IDS = array(
defined( 'WPINC' ) || die(); defined( 'WPINC' ) || die();
/**
* Get the xprofile `hours_per_week` and `team_names` for all contributors, regardless of sponsorship status.
*
* The "Sponsored" field is not retrieved because it's usually not needed, and including it would significantly
* hurt performance.
*/
function get_all_xprofile_contributor_hours_teams() : array {
global $wpdb;
// This might need a `LIMIT` in the future as more users save values, but it's performant as of August 2022.
// `LIMIT`ing it would require batch processing, which would add a significant amount of complexity.
// A better alternative might be to add a cron job to delete rows from `bpmain_bp_xprofile_data` where
// `hours_per_week` is < 1, or `teams_names` is a (serialized) empty array. BuddyPress saves those as
// values rather than deleting them, and that significantly increases the number of rows returned.
$users = $wpdb->get_results( $wpdb->prepare( '
SELECT user_id, GROUP_CONCAT( field_id ) AS field_ids, GROUP_CONCAT( value ) AS field_values
FROM `bpmain_bp_xprofile_data`
WHERE field_id IN ( %d, %d )
GROUP BY user_id',
FIELD_IDS['hours_per_week'],
FIELD_IDS['team_names']
) );
$field_names = array_flip( FIELD_IDS );
foreach ( $users as $user_index => & $user ) {
$fields = explode( ',', $user->field_ids );
$values = explode( ',', $user->field_values );
foreach ( $fields as $field_index => $id ) {
/*
* BuddyPress validates the team name(s) the user provides before saving them in the database, so
* it should be safe to unserialize, and to assume that they're valid.
*
* The database stores team _names_ rather than _IDs_, though, so if a team is ever renamed, this
* data will be distorted.
*/
$user->{$field_names[ $id ]} = maybe_unserialize( $values[ $field_index ] );
}
unset( $user->field_ids, $user->field_values ); // Remove the concatenated data now that it's exploded.
$user->user_id = absint( $user->user_id );
$user->hours_per_week = absint( $user->hours_per_week ?? 0 );
$user->team_names = (array) $user->team_names ?? array();
if ( 0 >= $user->hours_per_week || empty( $user->team_names ) ) {
unset( $users[ $user_index ] );
}
}
return $users;
}
/** /**
* Pull relevant data from profiles.wordpress.org. * Pull relevant data from profiles.wordpress.org.
* *
* Note that this does not unserialize anything, it just pulls the raw values from the database table. If you * Note that this does not unserialize anything, it just pulls the raw values from the database table. If you
* want unserialized data, use `prepare_xprofile_contribution_data()`. * want unserialized data, use `prepare_xprofile_contribution_data()`.
* *
* @global wpdb $wpdb * @global WPDB $wpdb
* *
* @param array $user_ids * @param array $user_ids
* *