---
jupytext:
text_representation:
extension: .md
format_name: myst
format_version: 0.13
jupytext_version: 1.11.2
kernelspec:
display_name: Python 3
language: python
name: python3
---
```{code-cell} ipython3
from __future__ import division, print_function
%matplotlib inline
```
# DNA microarray processing
## Data in this example
*Yeast microarrays for genome wide parallel genetic and gene
expression analysis*
Two-color fluorescent scan of a yeast microarray containing 2,479 elements
(ORFs). The center-to-center distance between elements is 345 μm. A probe
mixture consisting of cDNA from yeast extract/peptone (YEP) galactose (green
pseudocolor) and YEP glucose (red pseudocolor) grown yeast cultures was
hybridized to the array. Intensity per element corresponds to ORF expression,
and pseudocolor per element corresponds to relative ORF expression between the
two cultures.
by Deval A. Lashkari, http://www.pnas.org/content/94/24/13057/F1.expansion
Learn more about microarrays:
- [Tutorial on how to analyze microarray data](http://www.hhmi.org/biointeractive/how-analyze-dna-microarray-data)
- [Complementary DNA](http://en.wikipedia.org/wiki/Complementary_DNA)
More example data:
- [MicroArray Genome Imaging & Clustering Tool](http://www.bio.davidson.edu/projects/MAGIC/MAGIC.html) by Laurie Heyer & team, Davidson College
```{code-cell} ipython3
import matplotlib.pyplot as plt
import numpy as np
from skimage import io, img_as_float
```
```{code-cell} ipython3
microarray = io.imread('../images/microarray.jpg')
# Scale between zero and one
microarray = img_as_float(microarray)
plt.figure(figsize=(10, 5))
plt.imshow(microarray[:500, :1000], cmap='gray', interpolation='nearest');
```
```{code-cell} ipython3
from skimage import color
f, (ax0, ax1) = plt.subplots(1, 2, figsize=(15, 10))
red = microarray[..., 0]
green = microarray[..., 1]
red_rgb = np.zeros_like(microarray)
red_rgb[..., 0] = red
green_rgb = np.zeros_like(microarray)
green_rgb[..., 1] = green
ax0.imshow(green_rgb, interpolation='nearest')
ax1.imshow(red_rgb, interpolation='nearest')
plt.suptitle('\n\nPseudocolor plots of red and green channels', fontsize=16);
```
```{code-cell} ipython3
from skimage import filters
mask = (green > 0.1)
plt.imshow(mask[:1000, :1000], cmap='gray');
```
```{code-cell} ipython3
z = red.copy()
z /= green
z[~mask] = 0
print(z.min(), z.max())
plt.imshow(z[:500, :500], cmap=plt.cm.gray, vmin=0, vmax=2);
```
## Locating the grid
```{code-cell} ipython3
both = (green + red)
plt.imshow(both, cmap='gray');
```
```{code-cell} ipython3
from skimage import feature
sum_down_columns = both.sum(axis=0)
sum_across_rows = both.sum(axis=1)
dips_columns = feature.peak_local_max(sum_down_columns.max() - sum_down_columns, min_distance=5)
dips_columns = np.sort(dips_columns.ravel())
M = len(dips_columns)
column_distance = np.mean(np.diff(dips_columns))
dips_rows = feature.peak_local_max(sum_across_rows.max() - sum_across_rows, min_distance=5)
dips_rows = np.sort(dips_rows.ravel())
N = len(dips_rows)
row_distance = np.mean(np.diff(dips_rows))
print('Columns are a mean distance of %.2f apart' % column_distance)
print('Rows are a mean distance of %.2f apart' % row_distance)
f, (ax0, ax1) = plt.subplots(1, 2, figsize=(15, 5))
ax0.plot(sum_down_columns)
ax0.scatter(dips_columns, sum_down_columns[dips_columns])
ax0.set_xlim(0, 200)
ax0.set_title('Column gaps')
ax1.plot(sum_across_rows)
ax1.scatter(dips_rows, sum_across_rows[dips_rows])
ax1.set_xlim(0, 200)
ax0.set_title('Row gaps');
```
```{code-cell} ipython3
P, Q = 500, 500
plt.figure(figsize=(15, 10))
plt.imshow(microarray[:P, :Q])
for i in dips_rows[dips_rows < P]:
plt.plot([0, Q], [i, i], 'm')
for j in dips_columns[dips_columns < Q]:
plt.plot([j, j], [0, P], 'm')
plt.axis('image');
```
```{code-cell} ipython3
out = np.zeros(microarray.shape[:2])
M, N = len(dips_rows), len(dips_columns)
for i in range(M - 1):
for j in range(N - 1):
row0, row1 = dips_rows[i], dips_rows[i + 1]
col0, col1 = dips_columns[j], dips_columns[j + 1]
r = microarray[row0:row1, col0:col1, 0]
g = microarray[row0:row1, col0:col1, 1]
ratio = r / g
mask = ~np.isinf(ratio)
mean_ratio = np.mean(ratio[mask])
if np.isnan(mean_ratio):
mean_ratio = 0
out[row0:row1, col0:col1] = mean_ratio
```
```{code-cell} ipython3
f, (ax0, ax1) = plt.subplots(1, 2, figsize=(15, 10))
ax0.imshow(microarray)
ax0.grid(color='magenta', linewidth=1)
ax1.imshow(out, cmap='gray', interpolation='nearest', vmin=0, vmax=3);
ax1.grid(color='magenta', linewidth=1)
```
## Transform the intensity to spot outliers
```{code-cell} ipython3
from skimage import exposure
f, (ax0, ax1) = plt.subplots(1, 2, figsize=(15, 10))
ax0.imshow(microarray)
ax0.grid(color='magenta', linewidth=1)
ax1.imshow(exposure.adjust_log(out, gain=0.4), cmap='gray', interpolation='nearest', vmin=0, vmax=3);
ax1.grid(color='magenta', linewidth=1)
```