Skip to content

Commit 4bdd76a

Browse files
committed
works now also for gzipped files
1 parent 2dd3267 commit 4bdd76a

6 files changed

Lines changed: 165 additions & 82 deletions

File tree

POPSC/src/cif_reader.cpp

Lines changed: 138 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,15 @@ Read the COPYING file for license information.
88
#include <gemmi/cif.hpp>
99
#include <gemmi/mmcif.hpp>
1010
#include <gemmi/model.hpp>
11+
12+
#include <zlib.h>
13+
#include <unistd.h>
14+
1115
#include <cstdlib>
1216
#include <cstring>
17+
#include <cstdio>
1318
#include <string>
19+
#include <exception>
1420

1521
#include "cif_reader.h"
1622

@@ -22,114 +28,165 @@ static char* copy_string(const std::string& x) {
2228
return p;
2329
}
2430

31+
/*___________________________________________________________________________*/
32+
static std::string gunzip_to_temp_file(const char* filename) {
33+
char tmpname[] = "/tmp/pops_mmcif_XXXXXX";
34+
35+
int fd = mkstemp(tmpname);
36+
if (fd < 0)
37+
return "";
38+
39+
gzFile in = gzopen(filename, "rb");
40+
if (!in) {
41+
close(fd);
42+
unlink(tmpname);
43+
return "";
44+
}
45+
46+
char buffer[8192];
47+
int nread;
48+
49+
while ((nread = gzread(in, buffer, sizeof(buffer))) > 0) {
50+
if (write(fd, buffer, nread) != nread) {
51+
gzclose(in);
52+
close(fd);
53+
unlink(tmpname);
54+
return "";
55+
}
56+
}
57+
58+
gzclose(in);
59+
close(fd);
60+
61+
return std::string(tmpname);
62+
}
63+
2564
/*___________________________________________________________________________*/
2665
Structure* read_cif(const char* filename) {
27-
gemmi::cif::Document doc = gemmi::cif::read_file(std::string(filename));
66+
Structure* s = nullptr;
2867

29-
if (doc.blocks.empty())
30-
return nullptr;
68+
try {
69+
gemmi::cif::Document doc;
70+
std::string fname(filename);
3171

32-
gemmi::Structure st = gemmi::make_structure_from_block(doc.blocks[0]);
72+
if (fname.size() >= 3 &&
73+
fname.substr(fname.size() - 3) == ".gz") {
3374

34-
if (st.models.empty())
35-
return nullptr;
75+
std::string tmpfile = gunzip_to_temp_file(filename);
76+
if (tmpfile.empty())
77+
return nullptr;
3678

37-
const gemmi::Model& model = st.models[0];
38-
39-
int n = 0;
40-
for (const gemmi::Chain& chain : model.chains)
41-
for (const gemmi::Residue& res : chain.residues)
42-
n += (int) res.atoms.size();
43-
44-
Structure* s = (Structure*) std::calloc(1, sizeof(Structure));
45-
if (!s) return nullptr;
46-
47-
s->natom = n;
48-
s->xyz = (double*) std::malloc(sizeof(double) * 3 * n);
49-
s->atom_name = (char**) std::calloc(n, sizeof(char*));
50-
s->atom_number = (int*) std::malloc(sizeof(int) * n);
51-
s->altloc = (char*) std::malloc(sizeof(char) * n);
52-
s->res_name = (char**) std::calloc(n, sizeof(char*));
53-
s->res_number = (int*) std::malloc(sizeof(int) * n);
54-
s->ins_code = (char*) std::malloc(sizeof(char) * n);
55-
s->chain_name = (char**) std::calloc(n, sizeof(char*));
56-
s->element = (char**) std::calloc(n, sizeof(char*));
57-
s->record_type = (char*) std::malloc(sizeof(char) * n);
58-
59-
if (!s->xyz || !s->atom_name || !s->atom_number ||
60-
!s->res_name || !s->res_number ||
61-
!s->chain_name) {
62-
free_structure(s);
63-
return nullptr;
64-
}
79+
doc = gemmi::cif::read_file(tmpfile);
80+
unlink(tmpfile.c_str());
6581

66-
int i = 0;
67-
s->chain_number = 0;
82+
} else {
83+
doc = gemmi::cif::read_file(fname);
84+
}
85+
86+
if (doc.blocks.empty())
87+
return nullptr;
88+
89+
gemmi::Structure st =
90+
gemmi::make_structure_from_block(doc.blocks[0]);
91+
92+
if (st.models.empty())
93+
return nullptr;
6894

69-
for (const gemmi::Chain& chain : model.chains) {
70-
s->nresidue += chain.residues.size();
71-
72-
for (const gemmi::Residue& res : chain.residues) {
73-
for (const gemmi::Atom& atom : res.atoms) {
95+
const gemmi::Model& model = st.models[0];
7496

75-
// skip hydrogen (or deuterium) atoms
76-
//if (atom.element.is_hydrogen())
77-
// continue;
97+
int n = 0;
98+
for (const gemmi::Chain& chain : model.chains) {
99+
for (const gemmi::Residue& res : chain.residues) {
100+
n += (int) res.atoms.size();
101+
}
102+
}
78103

79-
// coordinates
80-
s->xyz[3*i + 0] = atom.pos.x;
81-
s->xyz[3*i + 1] = atom.pos.y;
82-
s->xyz[3*i + 2] = atom.pos.z;
104+
s = (Structure*) std::calloc(1, sizeof(Structure));
105+
if (!s)
106+
return nullptr;
107+
108+
s->natom = n;
109+
110+
s->xyz = (double*) std::malloc(sizeof(double) * 3 * n);
111+
s->atom_name = (char**) std::calloc(n, sizeof(char*));
112+
s->atom_number = (int*) std::malloc(sizeof(int) * n);
113+
s->altloc = (char*) std::malloc(sizeof(char) * n);
114+
s->res_name = (char**) std::calloc(n, sizeof(char*));
115+
s->res_number = (int*) std::malloc(sizeof(int) * n);
116+
s->ins_code = (char*) std::malloc(sizeof(char) * n);
117+
s->chain_name = (char**) std::calloc(n, sizeof(char*));
118+
s->element = (char**) std::calloc(n, sizeof(char*));
119+
s->record_type = (char*) std::malloc(sizeof(char) * n);
120+
121+
if (!s->xyz || !s->atom_name || !s->atom_number ||
122+
!s->altloc || !s->res_name || !s->res_number ||
123+
!s->ins_code || !s->chain_name || !s->element ||
124+
!s->record_type) {
125+
free_structure(s);
126+
return nullptr;
127+
}
83128

84-
// atom names
85-
s->atom_name[i] = copy_string(atom.name);
86-
s->atom_number[i] = atom.serial;
87-
s->altloc[i] = atom.altloc;
129+
int i = 0;
130+
s->chain_number = 0;
131+
s->nresidue = 0;
88132

89-
// residue names
90-
s->res_name[i] = copy_string(res.name);
91-
s->res_number[i] = res.seqid.num.has_value() ? res.seqid.num.value : 0;
92-
s->ins_code[i] = res.seqid.icode;
133+
for (const gemmi::Chain& chain : model.chains) {
134+
s->nresidue += (int) chain.residues.size();
93135

94-
// chain names
95-
s->chain_name[i] = copy_string(chain.name);
136+
for (const gemmi::Residue& res : chain.residues) {
137+
for (const gemmi::Atom& atom : res.atoms) {
96138

97-
// element
98-
std::string elem = atom.element.name();
139+
s->xyz[3*i + 0] = atom.pos.x;
140+
s->xyz[3*i + 1] = atom.pos.y;
141+
s->xyz[3*i + 2] = atom.pos.z;
99142

100-
if (elem.empty() || elem == "X") {
101-
elem = atom.name;
102-
}
143+
s->atom_name[i] = copy_string(atom.name);
144+
s->atom_number[i] = atom.serial;
145+
s->altloc[i] = atom.altloc ? atom.altloc : ' ';
103146

104-
s->element[i] = copy_string(elem.c_str());
147+
s->res_name[i] = copy_string(res.name);
148+
s->res_number[i] =
149+
res.seqid.num.has_value() ? res.seqid.num.value : 0;
150+
s->ins_code[i] = res.seqid.icode ? res.seqid.icode : ' ';
105151

106-
// 'A' for ATOM, 'H' for HETATM
107-
s->record_type[i] = res.het_flag;
152+
s->chain_name[i] = copy_string(chain.name);
108153

109-
// check for non-null entries
110-
if (!s->atom_name[i] || !s->res_name[i] || !s->chain_name[i] ||
111-
!s->atom_number[i] || !s->res_number[i]) {
112-
free_structure(s);
113-
return nullptr;
114-
}
115-
if (!s->atom_name[i] || !s->res_name[i] ||
116-
!s->chain_name[i] || !s->element[i]) {
117-
free_structure(s);
118-
return nullptr;
119-
}
154+
std::string elem = atom.element.name();
155+
if (elem.empty() || elem == "X")
156+
elem = atom.name;
120157

121-
++ i;
158+
s->element[i] = copy_string(elem);
159+
160+
s->record_type[i] = res.het_flag;
161+
162+
if (!s->atom_name[i] || !s->res_name[i] ||
163+
!s->chain_name[i] || !s->element[i]) {
164+
free_structure(s);
165+
return nullptr;
166+
}
167+
168+
++i;
169+
}
122170
}
171+
172+
++s->chain_number;
123173
}
124-
++ s->chain_number;
174+
175+
return s;
125176
}
126177

127-
return s;
178+
catch (const std::exception& e) {
179+
fprintf(stderr, "Gemmi CIF read error: %s\n", e.what());
180+
if (s)
181+
free_structure(s);
182+
return nullptr;
183+
}
128184
}
129185

130186
/*___________________________________________________________________________*/
131187
void free_structure(Structure* s) {
132-
if (!s) return;
188+
if (!s)
189+
return;
133190

134191
if (s->atom_name) {
135192
for (int i = 0; i < s->natom; ++i)

POPSC/src/getmmcif.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,9 @@ int map_structure_mmcif(Arg *arg, Argpdb *argpdb, Str *str, Structure *s) {
120120
/* number of chains */
121121
str->nChain = s->chain_number;
122122

123-
printf("nAtom %d , nResidue %d , nChain %d", str->nAtom, str->nResidue, str->nChain);
123+
/* for debigging only
124+
printf("nAtom %d , nResidue %d , nChain %d\n", str->nAtom, str->nResidue, str->nChain);
125+
*/
124126

125127
/*____________________________________________________________________________*/
126128
/* allocate memory for structure */

POPSC/src/pops

5.35 KB
Binary file not shown.

POPSC/tests/test4a.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#! /bin/sh
2+
3+
echo "--------------------------------------------------------------"
4+
echo " test4a.sh "
5+
echo "--------------------------------------------------------------"
6+
7+
../src/pops --mmcif 1F3R.cif --compositionOut --typeOut --topologyOut --atomOut --residueOut --chainOut || exit 1
8+

POPSC/tests/test4b.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#! /bin/sh
2+
3+
echo "--------------------------------------------------------------"
4+
echo " test4a.sh "
5+
echo "--------------------------------------------------------------"
6+
7+
valgrind ../src/pops --mmcif 1F3R.cif.gz --compositionOut --typeOut --topologyOut --atomOut --residueOut --chainOut || exit 1
8+

POPSC/tests/test5a.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#! /bin/sh
2+
3+
echo "--------------------------------------------------------------"
4+
echo " test5a.sh "
5+
echo "--------------------------------------------------------------"
6+
7+
../src/pops --mmcif 5LFF.cif.gz --compositionOut --typeOut --topologyOut --atomOut --residueOut --chainOut || exit 1
8+

0 commit comments

Comments
 (0)