How can I fit a harmonic regression to this circadian gene expression data?

36 Views Asked by At

I have spent several days trying to fit a single harmonic regression line to gene expression data (expressed as z-scores of logCPM) for 4 genes (ARNTL, PER3, HOXB5 and TSSK6) but either get no output or obviously wrong output (like, join the dots). The data cover these four genes and two conditions: type 2 diabetes (T2D) and normal glucose tolerance (NGT) at 8 time-points (with a few NAs for missing data). I have the points plotted using a simple ggplot + facet wrap as shown below:

list(
  transmute(ccount_sub_z_c, x=biopsy_times, y=ARNTL, z = T2D_status, dataset="ARNTL"),
  transmute(ccount_sub_z_c, x=biopsy_times, y=HOXB5, z = T2D_status, dataset="HOXB5"),
  transmute(ccount_sub_z_c, x=biopsy_times, y=PER3,  z = T2D_status, dataset="PER3"),
  transmute(ccount_sub_z_c, x=biopsy_times, y=TSSK6, z = T2D_status, dataset="TSSK6")
) %>%
bind_rows() %>%
ggplot(aes(x, y, color = z, alpha = 1)) +
geom_point() +
scale_color_manual(values = c("T2D" = "red", "NGT" = "black")) +
scale_x_continuous(breaks=seq(12,54,6)) +
theme_bw() +
labs(x="Time (Hours)",
     y = "Expression (logCPM  Z-Scores)",
     title = "") +
ylim(-3,3) +
facet_wrap(~ dataset, nrow = 2)

Which, gives the following output: Gene expression without regression lines:

enter image description here

And I'm aiming for 8 regression lines (one for each condition in the four genes of interest), looking something like this (here, there are different genes, but the visuals are the same):

enter image description here

The data I am using is here:

structure(list(PER3 = c(-1.8605112, -0.643253, 0.2539088, -0.5386873, 
-1.2880119, -0.377805, -0.08502406, -0.48822174, -2.2136435, 
-0.2608282, 0.08872692, -1.2315711, -1.636151, 0.0472454, 0.1916006, 
-1.4679386, -1.75899236, 0.5121136, 1.3116756, 0.03760634, 0.81405448, 
2.0438138, 1.3734468, -1.8918818, -0.6560474, 0.1578197, -0.5042942, 
-1.2390707, -0.2350202, 0.1394818, -0.39934674, -1.5197885, -0.8475531, 
-0.1520613, -0.7989512, -1.1803705, -0.7326796, -0.23958934, 
-0.5257523, -0.2890226, 0.6118268, 1.573492, 1.18668459, 0.405249, 
0.4896124, 1.429997, 1.1441604, -0.730272, 1.2695509, 0.6676447, 
0.2026659, 0.6318694, 1.1190546, 0.43000372, -1.8120803, -0.551603, 
-0.03170801, -0.7375514, -1.5231275, -0.5208613, -0.4012739, 
-0.86116523, -0.1941759, 0.3695457, 1.2996387, 1.0620178, 0.1340747, 
0.4752196, 1.1308677, 0.9466533, -1.3505621, 0.608225, 1.338756, 
1.22216917, 0.1526674, 1.2736077, 1.5215811, -0.9565514, 0.5318385, 
1.655895, 1.07594347, 0.03574761, 0.07587869, 1.4056785, 1.22197917, 
-1.953449, 0.0881295, 1.5511591, 1.1316187, -0.09191809, 0.1975049, 
1.320653, 0.963156), ARNTL = c(1.5808924, -0.0613, -1.0718681, 
0.2885102, 0.5560341, -0.5163479, -1.48425979, -0.07755096, 2.1311676, 
-0.231982, -1.02101403, 0.491655, 0.6984799, -1.2672634, -1.6992463, 
0.502801, 2.16329002, 0.5149568, 0.2891506, 1.20507696, -0.41845329, 
-2.935687, -0.3731248, 1.3656585, -0.1816691, -1.0159735, 0.3032471, 
1.0764242, -0.4461763, -1.2973028, -0.04255402, 1.0736566, -0.3957362, 
-0.72404733, 0.271964, 0.4570335, -0.7667532, -0.27268493, -0.1190917, 
1.7808427, 0.2662836, -0.9458592, -0.03428096, 0.8894374, 0.2644422, 
-0.7619612, -0.2535159, 1.4051226, -0.9736867, 0.1230401, 0.7439091, 
-0.1840971, -0.6272277, 0.07036623, 1.3104439, -0.2012947, -1.12672665, 
0.2672421, 0.4422906, -0.4312326, -0.6354239, -0.01453161, 1.2244261, 
0.6492846, -0.7890935, -0.4734071, 0.7630408, 0.5659685, -0.9209492, 
-0.4773658, 1.6574408, 0.7269578, -1.377552, -0.11156919, 0.9997797, 
-0.7276875, -0.9966026, 2.6460804, 0.2153202, -1.75762, -0.08113192, 
1.32161252, 0.10738718, -1.4766658, -0.75898435, 3.168538, 0.7282678, 
-1.808219, -0.4555415, 1.35589516, 0.02938852, -1.757335, -0.73694
), HOXB5 = c(1.4568738, 0.714593, 0.9243855, 1.2308207, 1.1443759, 
0.3766382, 0.84119079, 1.09084787, 0.8158154, 0.8158154, 0.81581538, 
0.8158154, 0.8158154, 0.8158154, 0.8158154, 0.8158154, -0.33672483, 
-1.3974578, -0.409367, 0.19832822, -1.39745778, -0.6021409, -1.3974578, 
1.1060134, 0.7700247, 1.046056, 1.2857231, 1.1287084, 0.9102424, 
0.8411106, 1.15925851, 0.6089302, 0.7393691, 1.20059741, 0.6114121, 
1.3532902, 1.2665713, 0.97514621, 1.0631887, -0.9166216, -0.9641054, 
-0.4003292, -0.62283725, -1.3244151, -0.906968, -0.9509605, -0.4003406, 
-0.3591125, -0.4137637, -0.8713421, -0.5983689, -0.77559, -1.7140872, 
-1.61575064, 1.1305316, 1.2007011, 0.65537555, 0.7312509, 0.9225818, 
1.0607144, 1.0448542, 0.77013105, -0.5359571, -0.5237443, -0.845499, 
-0.3843733, -0.6883206, -0.6358225, -1.7289606, -0.9479487, 0.1499593, 
0.4113389, -1.793917, 0.02580419, -0.1342978, -1.793917, -0.1649238, 
-0.8435722, -0.6536001, -1.136453, -1.87020757, -0.54069362, 
-0.60063957, -0.8853958, -0.75695719, -1.368387, -1.368387, -1.368387, 
-1.368387, -0.03351324, -1.36838705, -1.368387, -1.368387), TSSK6 = c(0.5441955, 
0.5621567, -0.5554598, 0.1296241, 0.4747992, 0.4804592, 0.67062983, 
0.29098696, 0.1120989, 0.169753, 0.160153, -0.1523376, 1.0085176, 
1.443607, -2.0145702, 0.4863663, 0.07379665, 0.343806, -0.9122589, 
-0.28526949, -0.04270529, 0.2978247, -0.4295654, 0.1067506, 0.1302583, 
0.3170126, 0.407078, 0.7630286, 0.2986219, 0.7515884, -0.07890579, 
0.674732, 0.3129959, 0.01501013, -0.1613294, 0.9406349, 0.4614066, 
-0.06030768, 0.3996212, 0.3247237, 0.2392465, 0.5360124, 0.21578854, 
0.7464346, 0.1040543, 0.9646754, 0.327108, -0.1755749, 0.1226724, 
0.8514533, -0.9133842, -0.3393731, 0.1397075, -0.19217393, 0.3962837, 
0.8612362, 0.49062343, 0.6629867, 0.8210244, 0.5080453, 0.151316, 
0.28387631, -0.4018196, -1.416295, 0.5599174, -0.1066756, -0.8154172, 
0.1080105, 0.1307904, -0.2856936, -6.1879637, -0.1025641, 2.260694, 
0.32522721, -0.6769221, -1.4405708, 0.1798854, -0.3373844, 0.2460028, 
0.330201, 0.23547786, -0.27717771, -0.14004214, 0.4019636, 0.06675485, 
-3.058042, -6.5067671, 0.6971473, 0.2645167, 0.99352754, 0.47141921, 
1.416794, 0.2454339), T2D_status = c("T2D", "T2D", "T2D", "T2D", 
"T2D", "T2D", "T2D", "T2D", "T2D", "T2D", "T2D", "T2D", "T2D", 
"T2D", "T2D", "T2D", "NGT", "NGT", "NGT", "NGT", "NGT", "NGT", 
"NGT", "T2D", "T2D", "T2D", "T2D", "T2D", "T2D", "T2D", "T2D", 
"T2D", "T2D", "T2D", "T2D", "T2D", "T2D", "T2D", "T2D", "NGT", 
"NGT", "NGT", "NGT", "NGT", "NGT", "NGT", "NGT", "NGT", "NGT", 
"NGT", "NGT", "NGT", "NGT", "NGT", "T2D", "T2D", "T2D", "T2D", 
"T2D", "T2D", "T2D", "T2D", "NGT", "NGT", "NGT", "NGT", "NGT", 
"NGT", "NGT", "NGT", "NGT", "NGT", "NGT", "NGT", "NGT", "NGT", 
"NGT", "NGT", "NGT", "NGT", "NGT", "NGT", "NGT", "NGT", "NGT", 
"NGT", "NGT", "NGT", "NGT", "NGT", "NGT", "NGT", "NGT"), participant_number = c("5", 
"5", "5", "5", "5", "5", "5", "5", "6", "6", "6", "6", "6", "6", 
"6", "6", "1", "1", "1", "1", "1", "1", "1", "2", "2", "2", "2", 
"2", "2", "2", "2", "3", "3", "3", "3", "3", "3", "3", "3", "4", 
"4", "4", "4", "4", "4", "4", "4", "12", "12", "12", "12", "12", 
"12", "12", "7", "7", "7", "7", "7", "7", "7", "7", "8", "8", 
"8", "8", "8", "8", "8", "8", "9", "9", "9", "9", "9", "9", "9", 
"10", "10", "10", "10", "10", "10", "10", "10", "11", "11", "11", 
"11", "11", "11", "11", "11"), glucose_condition = c("Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control", "Control", "Control", "Control", "Control", 
"Control", "Control"), biopsy_times = c(12, 18, 24, 30, 36, 42, 
48, 54, 12, 18, 24, 30, 36, 42, 48, 54, 12, 18, 30, 36, 42, 48, 
54, 12, 18, 24, 30, 36, 42, 48, 54, 12, 18, 24, 30, 36, 42, 48, 
54, 12, 18, 24, 30, 36, 42, 48, 54, 12, 24, 30, 36, 42, 48, 54, 
12, 18, 24, 30, 36, 42, 48, 54, 12, 18, 24, 30, 36, 42, 48, 54, 
12, 18, 24, 30, 36, 48, 54, 12, 18, 24, 30, 36, 42, 48, 54, 12, 
18, 24, 30, 36, 42, 48, 54)), row.names = c("T2D_CTL_12h_i05", 
"T2D_CTL_18h_i05", "T2D_CTL_24h_i05", "T2D_CTL_30h_i05", "T2D_CTL_36h_i05", 
"T2D_CTL_42h_i05", "T2D_CTL_48h_i05", "T2D_CTL_54h_i05", "T2D_CTL_12h_i06", 
"T2D_CTL_18h_i06", "T2D_CTL_24h_i06", "T2D_CTL_30h_i06", "T2D_CTL_36h_i06", 
"T2D_CTL_42h_i06", "T2D_CTL_48h_i06", "T2D_CTL_54h_i06", "NGT_CTL_12h_i01", 
"NGT_CTL_18h_i01", "NGT_CTL_30h_i01", "NGT_CTL_36h_i01", "NGT_CTL_42h_i01", 
"NGT_CTL_48h_i01", "NGT_CTL_54h_i01", "T2D_CTL_12h_i02", "T2D_CTL_18h_i02", 
"T2D_CTL_24h_i02", "T2D_CTL_30h_i02", "T2D_CTL_36h_i02", "T2D_CTL_42h_i02", 
"T2D_CTL_48h_i02", "T2D_CTL_54h_i02", "T2D_CTL_12h_i03", "T2D_CTL_18h_i03", 
"T2D_CTL_24h_i03", "T2D_CTL_30h_i03", "T2D_CTL_36h_i03", "T2D_CTL_42h_i03", 
"T2D_CTL_48h_i03", "T2D_CTL_54h_i03", "NGT_CTL_12h_i04", "NGT_CTL_18h_i04", 
"NGT_CTL_24h_i04", "NGT_CTL_30h_i04", "NGT_CTL_36h_i04", "NGT_CTL_42h_i04", 
"NGT_CTL_48h_i04", "NGT_CTL_54h_i04", "NGT_CTL_12h_i12", "NGT_CTL_24h_i12", 
"NGT_CTL_30h_i12", "NGT_CTL_36h_i12", "NGT_CTL_42h_i12", "NGT_CTL_48h_i12", 
"NGT_CTL_54h_i12", "T2D_CTL_12h_i07", "T2D_CTL_18h_i07", "T2D_CTL_24h_i07", 
"T2D_CTL_30h_i07", "T2D_CTL_36h_i07", "T2D_CTL_42h_i07", "T2D_CTL_48h_i07", 
"T2D_CTL_54h_i07", "NGT_CTL_12h_i08", "NGT_CTL_18h_i08", "NGT_CTL_24h_i08", 
"NGT_CTL_30h_i08", "NGT_CTL_36h_i08", "NGT_CTL_42h_i08", "NGT_CTL_48h_i08", 
"NGT_CTL_54h_i08", "NGT_CTL_12h_i09", "NGT_CTL_18h_i09", "NGT_CTL_24h_i09", 
"NGT_CTL_30h_i09", "NGT_CTL_36h_i09", "NGT_CTL_48h_i09", "NGT_CTL_54h_i09", 
"NGT_CTL_12h_i10", "NGT_CTL_18h_i10", "NGT_CTL_24h_i10", "NGT_CTL_30h_i10", 
"NGT_CTL_36h_i10", "NGT_CTL_42h_i10", "NGT_CTL_48h_i10", "NGT_CTL_54h_i10", 
"NGT_CTL_12h_i11", "NGT_CTL_18h_i11", "NGT_CTL_24h_i11", "NGT_CTL_30h_i11", 
"NGT_CTL_36h_i11", "NGT_CTL_42h_i11", "NGT_CTL_48h_i11", "NGT_CTL_54h_i11"
), class = "data.frame")

I have tried HarmonicRegression() (now deprecated) and hand-coding the regression, but I have to cut and paste the outputs and the result is poor. I need to submit the code used to produce my graphics, so this kludge won't fly. Any help will be so much appreciated!!! Thank you!!!

0

There are 0 best solutions below