Index: src/wp-admin/includes/upgrade.php
===================================================================
--- src/wp-admin/includes/upgrade.php	(revision 31160)
+++ src/wp-admin/includes/upgrade.php	(working copy)
@@ -513,6 +513,9 @@
 	if ( $wp_current_db_version < 29630 )
 		upgrade_400();
 
+	if ( $wp_current_db_version < 30134 )
+		upgrade_420();
+
 	maybe_disable_link_manager();
 
 	maybe_disable_automattic_widgets();
@@ -1400,6 +1403,26 @@
 }
 
 /**
+ * Execute changes made in WordPress 4.2.0.
+ *
+ * @since 4.2.0
+ */
+function upgrade_420() {
+	global $wp_current_db_version, $wpdb;
+	if ( $wp_current_db_version < 30134 && $wpdb->charset === 'utf8mb4') {
+		if ( is_multisite() ) {
+			$tables = $wpdb->tables;
+		} else {
+			$tables = array_merge( $wpdb->tables, $wpdb->global_tables );
+		}
+
+		foreach ( $tables as $table ) {
+			$wpdb->query( "ALTER TABLE {$wpdb->$table} CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" );
+		}
+	}
+}
+
+/**
  * Execute network level changes
  *
  * @since 3.0.0
@@ -1495,6 +1518,15 @@
 			update_site_option( 'illegal_names', $illegal_names );
 		}
 	}
+
+	// 4.2
+	if ( $wp_current_db_version < 30134 && $wpdb->charset === 'utf8mb4') {
+		$tables = array_merge( $wpdb->ms_global_tables, $wpdb->global_tables );
+
+		foreach ( $tables as $table ) {
+			$wpdb->query( "ALTER TABLE {$wpdb->$table} CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" );
+		}
+	}
 }
 
 // The functions we use to actually do stuff
Index: src/wp-admin/setup-config.php
===================================================================
--- src/wp-admin/setup-config.php	(revision 31160)
+++ src/wp-admin/setup-config.php	(working copy)
@@ -280,6 +280,11 @@
 			case 'DB_HOST'     :
 				$config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'" . addcslashes( constant( $constant ), "\\'" ) . "');\r\n";
 				break;
+			case 'DB_CHARSET'  :
+				if ( 'utf8mb4' === $wpdb->charset || ( ! $wpdb->charset && $wpdb->has_cap( 'utf8mb4' ) ) ) {
+					$config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'utf8mb4');\r\n";
+				}
+				break;
 			case 'AUTH_KEY'         :
 			case 'SECURE_AUTH_KEY'  :
 			case 'LOGGED_IN_KEY'    :
Index: src/wp-includes/version.php
===================================================================
--- src/wp-includes/version.php	(revision 31160)
+++ src/wp-includes/version.php	(working copy)
@@ -11,7 +11,7 @@
  *
  * @global int $wp_db_version
  */
-$wp_db_version = 30133;
+$wp_db_version = 30134;
 
 /**
  * Holds the TinyMCE version
Index: src/wp-includes/wp-db.php
===================================================================
--- src/wp-includes/wp-db.php	(revision 31160)
+++ src/wp-includes/wp-db.php	(working copy)
@@ -624,8 +624,6 @@
 			}
 		}
 
-		$this->init_charset();
-
 		$this->dbuser = $dbuser;
 		$this->dbpassword = $dbpassword;
 		$this->dbname = $dbname;
@@ -727,6 +725,14 @@
 
 		if ( defined( 'DB_CHARSET' ) )
 			$this->charset = DB_CHARSET;
+
+		if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) {
+			$this->charset = 'utf8mb4';
+		}
+
+		if ( 'utf8mb4' === $this->charset && ( ! $this->collate || stripos( $this->collate, 'utf8_' ) === 0 ) ) {
+			$this->collate = 'utf8mb4_unicode_ci';
+		}
 	}
 
 	/**
@@ -1477,7 +1483,10 @@
 			return false;
 		} elseif ( $this->dbh ) {
 			$this->has_connected = true;
+
+			$this->init_charset();
 			$this->set_charset( $this->dbh );
+
 			$this->ready = true;
 			$this->set_sql_mode();
 			$this->select( $this->dbname, $this->dbh );
@@ -2356,7 +2365,6 @@
 			'gb2312'  => 'EUC-CN',
 			'ujis'    => 'EUC-JP',
 			'utf32'   => 'UTF-32',
-			'utf8mb4' => 'UTF-8',
 		);
 
 		$supported_charsets = array();
@@ -2391,8 +2399,8 @@
 				}
 			}
 
-			// utf8(mb3) can be handled by regex, which is a bunch faster than a DB lookup.
-			if ( 'utf8' === $charset || 'utf8mb3' === $charset ) {
+			// utf8 can be handled by regex, which is a bunch faster than a DB lookup.
+			if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) {
 				$regex = '/
 					(
 						(?: [\x00-\x7F]                  # single-byte sequences   0xxxxxxx
@@ -2400,8 +2408,17 @@
 						|   \xE0[\xA0-\xBF][\x80-\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
 						|   [\xE1-\xEC][\x80-\xBF]{2}
 						|   \xED[\x80-\x9F][\x80-\xBF]
-						|   [\xEE-\xEF][\x80-\xBF]{2}
-						){1,50}                          # ...one or more times
+						|   [\xEE-\xEF][\x80-\xBF]{2}';
+
+				if ( 'utf8mb4' === $charset) {
+					$regex .= '
+						|    \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences   11110xxx 10xxxxxx * 3
+						|    [\xF1-\xF3][\x80-\xBF]{3}
+						|    \xF4[\x80-\x8F][\x80-\xBF]{2}
+					';
+				}
+
+				$regex .= '){1,50}                          # ...one or more times
 					)
 					| .                                  # anything else
 					/x';
@@ -2526,6 +2543,23 @@
 			return $charset;
 		}
 
+		if ( $this->posts === $table && 'post_content' === $column && 'utf8' === $charset && function_exists( 'mb_convert_encoding' ) ) {
+			$regex = '/([\xE0-\xEF][\x80-\xBF]{2}|[\xF0-\xF7][\x80-\xBF]{3})/';
+			$matches = array();
+			if ( preg_match_all( $regex, $value, $matches ) ) {
+				error_log( print_r( $matches, true ) );
+				if ( ! empty( $matches[1] ) ) {
+					foreach( $matches[1] as $emoji ) {
+						$unpacked = unpack( 'H*', mb_convert_encoding( $emoji, 'UTF-32', 'UTF-8' ) );
+						if ( isset( $unpacked[1] ) ) {
+							$entity = '&#x' . trim( $unpacked[1], '0' ) . ';';
+							$value = str_replace( $emoji, $entity, $value );
+						}
+					}
+				}
+			}
+		}
+
 		$data = array(
 			$column => array(
 				'value'   => $value,
Index: tests/phpunit/tests/db/charset.php
===================================================================
--- tests/phpunit/tests/db/charset.php	(revision 31160)
+++ tests/phpunit/tests/db/charset.php	(working copy)
@@ -130,11 +130,12 @@
 	}
 
 	/**
-	 * @ ticket 21212
+	 * @ticket 21212
 	 */
 	function test_process_fields_failure() {
 		global $wpdb;
-		$data = array( 'post_content' => "H€llo\xf0\x9f\x98\x88World¢" );
+		// \xf0\xff\xff\xff is invalid in utf8 and utf8mb4
+		$data = array( 'post_content' => "H€llo\xf0\xff\xff\xffWorld¢" );
 		$this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) );
 	}
 
@@ -436,6 +437,6 @@
 	 */
 	function test_invalid_characters_in_query() {
 		global $wpdb;
-		$this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\x9f\x98\x88bar')" ) );
+		$this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\xff\xff\xffbar')" ) );
 	}
 }
