%% LESSON 10: Histograms % % *FOCUS QUESTION: How can I understand and compare the distributions of two data sets?* % %% EXAMPLE 1: Load the Daphne Island and Santa Cruz Island beak size data Daphne = load('DaphneIslandBeaks.txt'); SantaCruz = load('SantaCruzIslandBeaks.txt'); %% EXAMPLE 2: Display a histogram of the Daphne Island beak size data nDaphne = length(Daphne); % Find number of Daphne Island finches titleDaphne = ['Daphne Island ground finches (n=' num2str(nDaphne) ')']; figure('Name', titleDaphne); % Create a titled figure window hist(Daphne) % Calculate and plot the histogram xlabel('Beak size in mm'); ylabel('Number of birds'); title(titleDaphne); % Use same title for plot as window %% EXAMPLE 3: Calculate and display a histogram using a bar chart, line graph and stair plot [n, xout] = hist(Daphne); % Calculate histogram but don't display figure hold on bar(xout, n); % Plot histogram using a bar chart plot(xout, n, '-ok') % Plot histogram using a black line graph stairs(xout, n, 'r') % Plot histogram using a red stair plot hold off xlabel('Beak size in mm'); ylabel('Number of birds'); title(titleDaphne); datacursormode on % Turn the data cursor on for exploration %% EXAMPLE 4: Explore the data cursor feature %% EXAMPLE 5: Use different bin sizes for Daphne Island histograms figure subplot(3, 1, 1) hist(Daphne, 10) % Create a plot a 10-bin histogram title(titleDaphne) % Put title over topmost graph legend('10 bins') ylabel('Birds') subplot(3, 1, 2) hist(Daphne, 25) % Create a plot a 25-bin histogram legend('25 bins') ylabel('Birds') subplot(3, 1, 3) hist(Daphne, 100) % Create a plot a 100-bin histogram legend('100 bins') xlabel('Beak size in mm') ylabel('Birds') %% EXAMPLE 6: Compare beak distributions of Daphne and Santa Cruz Islands nSantaCruz = length(SantaCruz); % Find number Santa Cruz Island finches figure subplot(1, 2, 1) hist(Daphne) % Histogram of Daphne Island finches title(['Daphne (n=' num2str(nDaphne) ')']) xlabel('Beak size (mm)') ylabel('Number of birds') % Only use one y label for both axes subplot(1, 2, 2) hist(SantaCruz) % Histogram of Santa Cruz Island finches title(['Santa Cruz (n=' num2str(nSantaCruz) ')']) xlabel('Beak size (mm)') %% EXAMPLE 7: Calculate histograms with explicit bin positions % Calculate bin positions explicitly to encompass range of both data sets minBeak = min([min(Daphne), min(SantaCruz)]); maxBeak = max([max(Daphne), max(SantaCruz)]); xBins = minBeak:0.2:maxBeak; % Bin positions will be 0.2 apart % Calculate the histograms based on these bins [nD, xD] = hist(Daphne, xBins); % Histogram of Daphne Island [nS, xS] = hist(SantaCruz, xBins); % Histogram of Santa Cruz Island %% EXAMPLE 8: Use overlapping stair plots to compare scaled distributions legendString = {['Daphne (n=' num2str(nDaphne) ')'], ... ['Santa Cruz (n=' num2str(nSantaCruz) ')']}; figure hold on stairs(xD, nD/sum(nD), 'k'); % Daphne in black stairs(xS, nS/sum(nS), 'r'); % SC in red xlabel('Beak size in mm'); ylabel('Fraction of birds with this beak size'); legend(legendString, 'Location', 'NorthWest'); title('Scaled comparison of Daphne and Santa Cruz finches') hold off %% EXAMPLE 9: Calculate the cumulative distributions for both data sets cumD = cumsum(nD)/sum(nD); % Cumulative distribution of Daphne Island cumS = cumsum(nS)./sum(nS); % Cumulative distribution of Santa Cruz Island %% EXAMPLE 10: Plot the cumulative distributions for both data sets figure hold on plot(xD, cumD, 'k'); plot(xS, cumS, 'r'); title('Beak size distributions at two islands in the Galápagos') xlabel('Beak size (mm)') ylabel('Fraction of birds with beak size less than x') legend(legendString, 'Location', 'NorthWest'); % Reuse legend from last example hold off %% EXAMPLE 11: Generate "random" numbers from three common probability distributions yNormal = random('norm', 0, 1, [10000, 1]); %normal with zero mean and unit sd yUniform = random('unif', -1, 1, [10000,1]); %uniform in the interval [-1, 1] yExponential = random('exp', 1, [10000, 1]); %exponential with mean 1 %% EXAMPLE 12: Display the histograms of the generated distributions figure subplot(3,1,1) hist(yNormal,50) title('Normal distribution (mean = 0, sd = 1)') subplot(3,1,2) hist(yUniform,50) title('Uniform distribution (on interval [-1, 1])') subplot(3,1,3) hist(yExponential,50) title('Exponential distribution (mean = 1)')