```python
import numpy as np
import pandas as pd
def big_small_analysis(data, threshold_percentile=75):
"""
Performs a big/small analysis on numerical data.
Args:
data: A pandas Series or a list/NumPy array of numerical data.
threshold_percentile: The percentile to use as the threshold for
distinguishing "big" and "small" values. Defaults to 75.
Returns:
A pandas DataFrame with analysis results, including:
- Original data
- Big/Small classification
- Summary statistics for both big and small groups
Returns None if the input data is invalid or empty.
"""
if not isinstance(data, (pd.Series, list, np.ndarray)):
print("Error: Input data must be a pandas Series, list, or NumPy array.")
return None
if not data: # Check for empty data
print("Error: Input data is empty.")
return None
if isinstance(data, list):
data = pd.Series(data)
elif isinstance(data, np.ndarray):
data = pd.Series(data)
try:
data = pd.to_numeric(data, errors='coerce') # Convert to numeric and handle errors
data = data.dropna() # Drop NaN values
except:
print("Error: Input data contains non-numeric values that cannot be converted.")
return None
if data.empty:
print("Error: Input data is empty after converting to numeric.")
return None
threshold = np.percentile(data, threshold_percentile)
big_small = pd.Series(["Big" if x > threshold else "Small" for x in data], name="Category", index=data.index)
results = pd.DataFrame({"Value": data, "Category": big_small})
big_group = results[results["Category"] == "Big"]["Value"]
small_group = results[results["Category"] == "Small"]["Value"]
summary = pd.DataFrame({
"Big": big_group.describe(),
"Small": small_group.describe(),
})
return results, summary
# Example usage:
data = [10, 25, 5, 30, 15, 40, 8, 22, 35, 12, np.nan, 'a']
analysis_results, summary_stats = big_small_analysis(data)
if analysis_results is not None:
print("Analysis Results:")
print(analysis_results)
print("\nSummary Statistics:")
print(summary_stats)
data2 = pd.Series([1,2,3,4,5,6,7,8,9,10])
analysis_results2, summary_stats2 = big_small_analysis(data2)
if analysis_results2 is not None:
print("\nAnalysis Results 2:")
print(analysis_results2)
print("\nSummary Statistics 2:")
print(summary_stats2)
data3 = np.array([1,2,3,4,5,6,7,8,9,10])
analysis_results3, summary_stats3 = big_small_analysis(data3)
if analysis_results3 is not None:
print("\nAnalysis Results 3:")
print(analysis_results3)
print("\nSummary Statistics 3:")
print(summary_stats3)
data4 = []
analysis_results4, summary_stats4 = big_small_analysis(data4)
data5 = [np.nan]
analysis_results5, summary_stats5 = big_small_analysis(data5)
```