I have the following dataset:

#tax_id Species GeneID  Symbol  description     start_position  end_position    orientation     genomic_accession.version
9606    Homo_sapiens_Linnaeus,_1758     10559   SLC35A1 solute_carrier_family_35_member_A1      87472973        87512335        +       NC_000006.12    EAW48599.1|P78382.1|CAH17780.1|CAH17781.1|CAH65468.1|CAH65469.1|CAH65470.1|BAG57132.1|BAG61410.1|CAH56397.1|AAH17807.1|BAA13522.1|NP_001161870.1|NP_001161870.1|NP_006407.1|NP_006407.1|NP_006407.1
9606    Homo_sapiens_Linnaeus,_1758     57038   RARS2   arginyl-tRNA_synthetase_2,_mitochondrial        87513937        87589986        -       NC_000006.12    EAW48584.1|EAW48585.1|EAW48586.1|EAW48587.1|EAW48588.1|EAW48589.1|EAW48590.1|EAW48591.1|Q5T160.1|AAF22030.1|BAB14608.1|BAG38034.1|AAH10420.1|XP_011534251.1|XP_016866562.1|XP_016866563.1|XP_047275048.1|XP_047275049.1|XP_047275050.1|XP_047275051.1|NP_001305714.1|NP_001305714.1|NP_001337434.1|NP_001337434.1|NP_001337435.1|NP_001337435.1|NP_001337436.1|NP_001337436.1|NP_001337437.1|NP_001337437.1|NP_001337438.1|NP_001337438.1|NP_001337439.1|NP_001337439.1|NP_001337440.1|NP_001337440.1|NP_064716.2|NP_064716.2|NP_064716.2
9606    Homo_sapiens_Linnaeus,_1758     23595   ORC3    origin_recognition_complex_subunit_3    87590134        87677822        +       NC_000006.12    AAT38109.1|EAW48581.1|EAW48582.1|EAW48583.1|Q9UBD5.1|AAD40220.1|AAD18057.1|AAD30282.1|BAD96515.1|BAG63963.1|BAH13891.1|BAG64276.1|BAG64287.1|BAH14282.1|BAH14669.1|CAB45715.1|AAH35494.1|AAH47689.1|CAG33095.1|AAA96313.1|XP_005248761.1|XP_011533953.1|XP_011533954.1|XP_016866121.1|XP_016866122.1|XP_016866123.1|XP_016866124.1|XP_047274504.1|XP_047274505.1|XP_047274506.1|XP_047274507.1|XP_047274508.1|NP_001184188.1|NP_001184188.1|NP_036513.2|NP_036513.2|NP_862820.1|NP_862820.1
9606    Homo_sapiens_Linnaeus,_1758     55122   AKIRIN2 akirin_2        87674859        87702232        -       NC_000006.12    CAH25916.1|EAW48578.1|Q53H80.2|BAA91551.1|BAD96421.1|AAH00764.1|AAH03042.1|AAH05051.1|NP_060534.1|NP_060534.1
9606    Homo_sapiens_Linnaeus,_1758     81833   SPACA1  sperm_acrosome_associated_1     88046900        88066837        +       NC_000006.12    CBH19450.1|EAW48577.1|Q9HBV2.1|AAG31422.1|AAH29488.1|XP_011534462.1|XP_016866824.1|XP_047275341.1|NP_112222.1|NP_112222.1
9606    Homo_sapiens_Linnaeus,_1758     1268    CNR1    cannabinoid_receptor_1  88139863        88167348        -       NC_000006.12    AAB18200.1|AAG37765.1|AAO67710.1|AGS49222.1|EAW48574.1|EAW48575.1|EAW48576.1|P21554.1|AAD34320.1|BAG36631.1|AAV35030.1|AAY21179.1|AAH74811.1|AAH74812.1|AAH95513.1|AAI00969.1|AAI00970.1|AAI00971.1|AAI00972.1|AAY68486.1|AGW25490.1|CAA38699.1|CAA57018.1|CAA57019.1|XP_011533727.1|XP_047274127.1|XP_047274128.1|XP_047274129.1|NP_001153698.1|NP_001153698.1|NP_001153730.1|NP_001153730.1|NP_001153731.1|NP_001153731.1|NP_001352798.1|NP_001352798.1|NP_001352799.1|NP_001352799.1|NP_001352801.1|NP_001352801.1|NP_001352803.1|NP_001352803.1|NP_001357474.1|NP_001357474.1|NP_001357475.1|NP_001357475.1|NP_001357476.1|NP_001357476.1|NP_057167.2|NP_057167.2|NP_149421.2|NP_149421.2
9606    Homo_sapiens_Linnaeus,_1758     8732    RNGTT   RNA_guanylyltransferase_and_5'-phosphatase      88609896        88963617        -       NC_000006.12    EAW48566.1|EAW48567.1|EAW48568.1|EAW48569.1|EAW48570.1|O60942.1|BAA25894.1|BAA25895.1|BAA25896.1|BAA25198.1|BAA25199.1|AAB91559.1|BAG58562.1|BAG61660.1|BAG35320.1|AAH19954.1|CAD97693.1|XP_047275398.1|XP_047275399.1|NP_001273355.1|NP_001273355.1|NP_001273357.1|NP_001273357.1|NP_003791.3|NP_003791.3
9606    Homo_sapiens_Linnaeus,_1758     10957   PNRC1   proline_rich_nuclear_receptor_coactivator_1     89080750        89085159        +       NC_000006.12    EAW48563.1|EAW48564.1|Q12796.1|AAK07554.1|BAG35547.1|AAP76184.1|AAH18112.1|AAH44919.1|AAA85576.1|XP_047274062.1|NP_006804.1|NP_006804.1
9606    Homo_sapiens_Linnaeus,_1758     135293  PM20D2  peptidase_M20_domain_containing_2       89093939        89165564        +       NC_000006.12    EAW48559.1|Q8IYS1.2|BAG63754.1|AAH35036.2|AAH45583.1|CAE45925.1|XP_005248719.1|XP_011533783.1|XP_016865778.1|XP_047274175.1|XP_047274176.1|XP_047274177.1|NP_001010853.1|NP_001010853.1
9606    Homo_sapiens_Linnaeus,_1758     135295  SRSF12  serine_and_arginine_rich_splicing_factor_12     89095958        89118070        -       NC_000006.12    EAW48561.1|EAW48562.1|Q8WXF0.1|AAL57515.1|BAG53879.1|BAG36719.1|AAH21715.1|XP_011533785.1|XP_047274179.1|XP_047274180.1|NP_001363825.1|NP_001363825.1|NP_001363826.1|NP_001363826.1|NP_001363827.1|NP_001363827.1|NP_542781.3|NP_542781.3
9606    Homo_sapiens_Linnaeus,_1758     2569    GABRR1  gamma-aminobutyric_acid_type_A_receptor_subunit_rho1    89177503        89231287        -       NC_000006.12    EAW48558.1|P24046.2|BAG58870.1|BAG61047.1|AAI30345.1|AAA52509.1|XP_016866178.1|NP_001243632.1|NP_001243632.1|NP_001243633.1|NP_001243633.1|NP_001254511.1|NP_001254511.1|NP_002033.2|NP_002033.2
10090   house_mouse     14408   Gabrr1  gamma-aminobutyric_acid_(GABA)_C_receptor,_subunit_rho_1        33132555        33163605        +       NC_000070.7     EDL05489.1|P56475.2|AAB81964.1|XP_006537675.1|XP_006537676.1|NP_032101.3
10090   house_mouse     242377  Pm20d2  peptidase_M20_domain_containing_2       33170400        33189736        -       NC_000070.7     A3KG59.1|EDL05488.1|BAE26351.1|XP_006537963.1|NP_001030039.2
10090   house_mouse     272009  Srsf12  serine_and_arginine-rich_splicing_factor_12     33208990        33233343        +       NC_000070.7     EDL05487.1|Q8C8K3.2|BAC32901.1|XP_036020063.1|NP_001343402.1|NP_001359447.1|NP_001359448.1|NP_808442.2
10090   house_mouse     108767  Pnrc1   proline-rich_nuclear_receptor_coactivator_1     33245422        33248786        -       NC_000070.7     EDL05486.1|BAE35293.1|BAE35724.1|XP_036019461.1|NP_001028397.2
10090   house_mouse     24018   Rngtt   RNA_guanylyltransferase_and_5'-phosphatase      33310299        33502613        +       NC_000070.7     EDL05483.1|EDL05484.1|O55236.1|AAB91558.1|AAB88903.1|BAB22459.1|BAE24914.1|BAE29611.1|BAE30421.1|BAE35555.1|BAE35965.1|BAE41060.1|AAH43657.1|XP_006537952.1|XP_006537953.1|XP_011248317.1|XP_017175678.2|XP_030109377.1|XP_036019988.1|XP_036019989.1|NP_001292202.1|NP_036014.1
10090   house_mouse     12801   Cnr1    cannabinoid_receptor_1_(brain)  33923170        33948830        +       NC_000070.7     AAA64413.1|AAG37743.1|AAS91801.1|CAB42647.1|EDL05482.1|P47746.1|AAD34624.1|BAE24003.1|AAS91800.1|AAH70447.1|AAH79564.1|AXZ96961.1|AXZ96962.1|AAA57202.1|AAA91176.1|XP_006537654.1|XP_006537655.1|XP_006537656.1|XP_006537657.1|XP_006537658.1|XP_030109009.1|XP_036019518.1|XP_036019519.1|NP_001341949.1|NP_001341950.1|NP_001352810.1|NP_031752.1
10090   house_mouse     67652   Spaca1  sperm_acrosome_associated_1     34024871        34050066        -       NC_000070.7     EDL05480.1|EDL05481.1|Q9DA48.1|BAB24447.1|AAH48388.1|AAI47080.1|AAI47081.1|NP_001277372.1|NP_080569.1
10090   house_mouse     433693  Akirin2 akirin_2        34550614        34566929        +       NC_000070.7     B1AXD8.1|EDL05478.1|BAB24265.1|XP_011248362.1|NP_001007590.2
10090   house_mouse     50793   Orc3    origin_recognition_complex,_subunit_3   34566780        34614941        -       NC_000070.7     EDL05475.1|EDL05476.1|EDL05477.1|Q9JK30.1|BAD91665.2|CAB76399.1|BAC32339.1|BAC39695.1|BAE26368.1|AAH61252.1|XP_011248368.1|XP_017175798.1|XP_030109513.1|XP_030109514.1|NP_001153035.1|NP_056639.3
10090   house_mouse     109093  Rars2   arginyl-tRNA_synthetase_2,_mitochondrial        34614940        34660166        +       NC_000070.7     EDL05473.1|EDL05474.1|Q3U186.1|BAE33614.1|AAH24878.1|XP_011248204.1|XP_017175401.1|XP_030108934.1|XP_030108935.1|XP_030108936.1|XP_036019465.1|XP_036019466.1|XP_036019467.1|NP_852071.2
10090   house_mouse     24060   Slc35a1 solute_carrier_family_35_(CMP-sialic_acid_transporter),_member_1        34663256        34688112        -       NC_000070.7     EDL05472.1|Q61420.2|AAH12252.1|CAA95855.1|XP_006537954.1|XP_006537955.1|NP_036025.2
9031    bantam  421816  PM20D2  peptidase_M20_domain_containing_2       75488609        75504694        -       NC_052534.1     XP_040523235.1|XP_040523236.1|XP_419840.1
9031    bantam  421817  PNRC1   proline_rich_nuclear_receptor_coactivator_1     75523709        75525985        -       NC_052534.1     CAG32458.1|NP_001012291.1|NP_001012291.1
9031    bantam  101749651       BORCS6  BLOC-1_related_complex_subunit_6        75525409        75527558        +       NC_052534.1     XP_004940447.2|XP_040522646.1
9031    bantam  421819  RNGTT   RNA_guanylyltransferase_and_5'-phosphatase      75541128        75727281        +       NC_052534.1     XP_004940448.1|XP_040522647.1|XP_040522648.1|XP_040522649.1|XP_040522650.1|XP_040553798.1|XP_040553799.1|XP_419843.2
9031    bantam  101749895       LOC101749895    uncharacterized_LOC101749895    75739266        75818610        +       NC_052534.1     XP_046770394.1|XP_046795747.1|XP_046795748.1
9031    bantam  428633  CNR1    cannabinoid_receptor_1  75866628        75884964        +       NC_052534.1     AAL85890.1|XP_015140122.1|XP_015140124.1|XP_040553215.1|XP_046770283.1|XP_046770284.1|XP_046770285.1|XP_046770286.1|XP_046770287.1|XP_046770288.1|XP_046794949.1|XP_046794950.1|XP_046794951.1|NP_001033741.1|NP_001033741.1
9031    bantam  421820  SPACA1  sperm_acrosome_associated_1     75908333        75927345        -       NC_052534.1     XP_040523367.1|XP_040523368.1|XP_040553800.1|XP_046770396.1|XP_046795749.1|XP_419844.3
9031    bantam  421821  AKIRIN2 akirin_2        76053762        76070568        +       NC_052534.1     ADK26453.1|AID16028.1|AID16029.1|NP_001180524.1|NP_001180524.1
9031    bantam  421822  ORC3    origin_recognition_complex_subunit_3    76075243        76112181        -       NC_052534.1     CAG32127.1|XP_040553104.1|XP_040553106.1|XP_046770044.1|XP_046770045.1|NP_001026253.3|NP_001026253.3|NP_001376213.2|NP_001376213.2|NP_001384958.1|NP_001384958.1
9031    bantam  421823  RARS2   arginyl-tRNA_synthetase_2,_mitochondrial        76112296        76151586        +       NC_052534.1     XP_015140103.2|XP_015140104.2|XP_025004526.2|XP_046770046.1|XP_046770047.1|XP_046770048.1|XP_046770049.1|NP_001264948.2|NP_001264948.2
9031    bantam  395184  SLC35A1 solute_carrier_family_35_member_A1      76145492        76161843        -       NC_052534.1     CAD59551.1|XP_015140083.1|XP_046768888.1|NP_989844.2|NP_989844.2
8355    African_clawed_frog     100653501       gabrr2.S        gamma-aminobutyric_acid_(GABA)_A_receptor,_rho_2_S_homeolog     73105463        73149410        +       NC_054380.1     OCT78274.1|ADQ43748.1|XP_018120934.1
8355    African_clawed_frog     108717563       gabrr1.S        gamma-aminobutyric_acid_(GABA)_A_receptor,_rho_1_S_homeolog     73187195        73211996        +       NC_054380.1     XP_041420587.1
8355    African_clawed_frog     108717974       pm20d2.S        peptidase_M20_domain_containing_2_S_homeolog    73227183        73254288        -       NC_054380.1     OCT78276.1|XP_041420588.1
8355    African_clawed_frog     108717975       LOC108717975    serine/arginine-rich_splicing_factor_12 73289764        73310695        +       NC_054380.1     OCT78279.1|OCT78280.1|OCT78281.1|XP_018120937.1
8355    African_clawed_frog     399383  rngtt.S RNA_guanylyltransferase_and_5'-phosphatase      73324343        73481597        +       NC_054380.1     OCT78282.1|AAF43143.1|AAF43144.1|AAI69624.1|XP_041419902.1|NP_001084232.1
8355    African_clawed_frog     373759  cnr1.S  cannabinoid_receptor_1_(brain)_S_homeolog       73730323        73769283        +       NC_054380.1     OCT78283.1|Q801M1.1|AAM28314.1|AAI69426.1|AAI70488.1|XP_018119706.1
8355    African_clawed_frog     443910  akirin2.S       akirin_2_S_homeolog     74041150        74053282        +       NC_054380.1     OCT78284.1|Q6GQB5.1|AAH72831.1|NP_001085484.1
8355    African_clawed_frog     379084  orc3.S  origin_recognition_complex_subunit_3_S_homeolog 74071479        74127829        -       NC_054380.1     OCT78285.1|AAC35897.1|AAH41312.1|XP_018119715.1|XP_041419826.1|NP_001079397.1
8355    African_clawed_frog     443863  rars2.S arginyl-tRNA_synthetase_2,_mitochondrial_S_homeolog     74127707        74158852        +       NC_054380.1     OCT78289.1|Q6GQJ7.1|AAH72745.1|XP_018119882.1|XP_018119883.1|XP_041419931.1|NP_001085437.1
8355    African_clawed_frog     108717976       slc35a1.S       solute_carrier_family_35_(CMP-sialic_acid_transporter),_member_A1_S_homeolog    74162194        74176297        -       NC_054380.1     OCT78290.1|XP_018120938.1
8355    African_clawed_frog     108717977       LOC108717977    phosphatidylinositol_glycan_anchor_biosynthesis_class_U_protein-like    74176296        74206734        +       NC_054380.1     XP_041420589.1|XP_041420590.1|XP_041420591.1|XP_041420592.1|XP_041420594.1

I am using gggenes package

Is it possible to get this plot where the genes lie adjacently to each other? enter image description here

Also, I want the genes of above image to be colored on the basis of their symbol or gene name and the species label must be on the left side with each species forming a separate chromosome diagram like this: enter image description here

I was using the "gggenes" package using the manual and formed this plot using the following syntax:

ggplot(cnr1_synteny_representative, aes(xmin = START_POSITION, xmax = END_POSITION, y = SPECIES, fill = SYMBOL)) +
+     geom_gene_arrow() + geom_blank(data=dummies) +
+     facet_wrap(~ SPECIES, scales = "free", ncol = 1) +
+     scale_fill_manual(values = mycolors) + theme_genes() + theme(legend.position = "none")

enter image description here

Many of the genes are overlapping and I want to avoid it, and I want the gene name to form a label on the gene arrow and moreover, the orientation must be shown using the orientation value (+ = right orientated and - = left orientated).

These are some good reference images, I would like the plot to resemble close to these. https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-019-3200-z

https://academic.oup.com/femsec/article/94/12/fiy182/5090968

1

There are 1 best solutions below

4
On

Can you see if this helps or if you can clarify the question with this reproducible example?

library(tidyverse)
library(data.table)
library(gggenes)

cnr1_synteny_representative <- 
data.table::fread(
  "tax_id  species                 geneid     symbol           start_position  end_position    orientation
  9606    Homo_sapiens_Linnaeus    10559      SLC35A1          87472973        87512335        + 
  9606    Homo_sapiens_Linnaeus    57038      RARS2            87513937        87589986        - 
  9606    Homo_sapiens_Linnaeus    23595      ORC3             87590134        87677822        + 
  9606    Homo_sapiens_Linnaeus    55122      AKIRIN2          87674859        87702232        - 
  9606    Homo_sapiens_Linnaeus    81833      SPACA1           88046900        88066837        + 
  9606    Homo_sapiens_Linnaeus    1268       CNR1             88139863        88167348        - 
  9606    Homo_sapiens_Linnaeus    8732       RNGTT            88609896        88963617        - 
  9606    Homo_sapiens_Linnaeus    10957      PNRC1            89080750        89085159        + 
  9606    Homo_sapiens_Linnaeus    135293     PM20D2           89093939        89165564        + 
  9606    Homo_sapiens_Linnaeus    135295     SRSF12           89095958        89118070        - 
  9606    Homo_sapiens_Linnaeus    2569       GABRR1           89177503        89231287        - 
  10090   house_mouse              14408      Gabrr1           33132555        33163605        + 
  10090   house_mouse              242377     Pm20d2           33170400        33189736        - 
  10090   house_mouse              272009     Srsf12           33208990        33233343        + 
  10090   house_mouse              108767     Pnrc1            33245422        33248786        - 
  10090   house_mouse              24018      Rngtt            33310299        33502613        + 
  10090   house_mouse              12801      Cnr1             33923170        33948830        + 
  10090   house_mouse              67652      Spaca1           34024871        34050066        - 
  10090   house_mouse              433693     Akirin2          34550614        34566929        + 
  10090   house_mouse              50793      Orc3             34566780        34614941        - 
  10090   house_mouse              109093     Rars2            34614940        34660166        + 
  10090   house_mouse              24060      Slc35a1          34663256        34688112        - 
  9031    bantam                   421816     PM20D2           75488609        75504694        - 
  9031    bantam                   421817     PNRC1            75523709        75525985        - 
  9031    bantam                   101749651  BORCS6           75525409        75527558        + 
  9031    bantam                   421819     RNGTT            75541128        75727281        + 
  9031    bantam                   101749895  LOC101749895     75739266        75818610        + 
  9031    bantam                   428633     CNR1             75866628        75884964        + 
  9031    bantam                   421820     SPACA1           75908333        75927345        - 
  9031    bantam                   421821     AKIRIN2          76053762        76070568        + 
  9031    bantam                   421822     ORC3             76075243        76112181        - 
  9031    bantam                   421823     RARS2            76112296        76151586        + 
  9031    bantam                   395184     SLC35A1          76145492        76161843        - 
  8355    African_clawed_frog      100653501  gabrr2.S         73105463        73149410        + 
  8355    African_clawed_frog      108717563  gabrr1.S         73187195        73211996        + 
  8355    African_clawed_frog      108717974  pm20d2.S         73227183        73254288        - 
  8355    African_clawed_frog      108717975  LOC108717975     73289764        73310695        + 
  8355    African_clawed_frog      399383     rngtt.S          73324343        73481597        + 
  8355    African_clawed_frog      373759     cnr1.S           73730323        73769283        + 
  8355    African_clawed_frog      443910     akirin2.S        74041150        74053282        + 
  8355    African_clawed_frog      379084     orc3.S           74071479        74127829        - 
  8355    African_clawed_frog      443863     rars2.S          74127707        74158852        + 
  8355    African_clawed_frog      108717976  slc35a1.S        74162194        74176297        - 
  8355    African_clawed_frog      108717977  LOC108717977     74176296        74206734        + "  
) |> 
  as_tibble()
  

dummies <- 
  make_alignment_dummies(
    cnr1_synteny_representative,
    aes(xmin = start_position, xmax = end_position, y = species, id = symbol),
    on = "RNGTT"
  )

cnr1_synteny_representative |> 
  mutate(
    color = symbol == "RNGTT",
    orientation = paste0(orientation, 1) |> as.integer()
  ) |> 
  ggplot(
    aes(
      xmin = start_position, 
      xmax = end_position, 
      y = species, 
      label = symbol
    )
  ) +
  facet_wrap(~ species, scales = "free", ncol = 1) +
  geom_gene_arrow(
    aes(fill = color, forward = orientation),
    arrowhead_height = unit(6, "mm"), 
    arrow_body_height = unit(6, "mm"), 
    arrowhead_width = unit(2, "mm")
  ) +
  geom_gene_label(align = "left") +
  geom_blank(data = dummies, fill = NA) +
  scale_fill_manual(values = c("cyan3", "orange")) + 
  theme_genes() + 
  theme(legend.position = "none")

enter image description here


Given your updates, try this:

prep_df <- 
  cnr1_synteny_representative |> 
  mutate(
    species = str_replace_all(species, "_", " ") |> str_to_title(),
    orientation = paste0(orientation, 1) |> as.integer(),
    symbol = toupper(symbol) |> substr(1, 3),
    color = str_detect(symbol, '.R')
  ) |> 
  group_by(tax_id) |> 
  arrange(start_position) |> 
  mutate(
    length = end_position - start_position,
    end = cumsum(length),
    start = lag(end, default = 0),
    max = max(end)
  ) |>
  # rescale points
  mutate(
    end_pct = end / max,
    start_pct = start / max,
    pct_diff = end_pct - start_pct,
    enough_room = pct_diff > 0.02,
    label_position = (start_pct + end_pct) / 2 # mean
  ) |> 
  ungroup() |>
  print()


 prep_df |> 
  ggplot(aes(xmin = start_pct, xmax = end_pct, y = species,  label = symbol)) +
  geom_gene_arrow(
    aes(fill = color, forward = orientation),
    color = "white",
    arrowhead_height =  unit(6, "mm"), 
    arrow_body_height = unit(6, "mm"), 
    arrowhead_width =   unit(1, "mm")
  ) +
  ggrepel::geom_text_repel(
    data = ~filter(prep_df, !enough_room), 
    seed = 1234, direction = "y", 
    point.padding = 30, min.segment.length = 0,
    size = 2.8,
    aes(x = label_position)
  ) +
  geom_text(
    data = ~filter(prep_df, enough_room), 
    aes(x = label_position), size = 2.8
  ) +
  scale_fill_manual(values = c("cyan3", "orange")) + 
  scale_x_continuous(expand = expansion()) + 
  labs(x = NULL, y = NULL) +
  theme_genes() + 
  theme(
    legend.position = "none",
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank()
  )

enter image description here