Files
neutrino/src/system/ytparser.cpp
Stefan Seyfried 0d9139054a remove "using namespace std" from header files
"using namespace std" in headers is considered bad practice, so move it
either into the respective cpp files or (for files which have lots of
other std::xxx usage anyway) just write it out explicitly.
Looking at the headers and the affected cpp files, one can actually see
why it is bad practice, as it's spreading very far ;-)
2017-10-02 13:43:57 +02:00

721 lines
19 KiB
C++

/*
Copyright (C) 2013 CoolStream International Ltd
License: GPLv2
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation;
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <errno.h>
#include <fstream>
#include <set>
#include <map>
#include <vector>
#include <bitset>
#include <string>
#include <OpenThreads/ScopedLock>
#include "settings.h"
#include "helpers.h"
#include "helpers-json.h"
#include "set_threadname.h"
#include <global.h>
#include <json/json.h>
#include "ytparser.h"
#include "ytcache.h"
#if LIBCURL_VERSION_NUM < 0x071507
#include <curl/types.h>
#endif
#define URL_TIMEOUT 60
static int itags[] = { 37 /* 1080p MP4 */, 22 /* 720p MP4 */, 18 /* 270p/360p MP4 */, 0 };
std::string cYTVideoUrl::GetUrl()
{
std::string fullurl = url;
//fullurl += "&signature=";
//fullurl += sig;
return fullurl;
}
void cYTVideoInfo::Dump()
{
printf("id: %s\n", id.c_str());
printf("author: %s\n", author.c_str());
printf("title: %s\n", title.c_str());
printf("duration: %d\n", duration);
//printf("description: %s\n", description.c_str());
printf("urls: %d\n", (int)formats.size());
for (yt_urlmap_iterator_t it = formats.begin(); it != formats.end(); ++it) {
printf("format %d type [%s] url %s\n", it->first, it->second.type.c_str(), it->second.GetUrl().c_str());
}
printf("===================================================================\n");
}
std::string cYTVideoInfo::GetUrl(int *fmt, bool mandatory)
{
int default_fmt = 0;
if (!*fmt)
fmt = &default_fmt;
yt_urlmap_iterator_t it;
if (fmt) {
if ((it = formats.find(*fmt)) != formats.end()) {
return it->second.GetUrl();
}
if (mandatory) {
*fmt = 0;
return "";
}
}
for (int *fmtp = itags; *fmtp; fmtp++)
if ((it = formats.find(*fmtp)) != formats.end()) {
*fmt = *fmtp;
return it->second.GetUrl();
}
return "";
}
cYTFeedParser::cYTFeedParser()
{
thumbnail_dir = "/tmp/ytparser";
parsed = false;
feedmode = -1;
tquality = "mqdefault";
max_results = 25;
concurrent_downloads = 2;
curl_handle = curl_easy_init();
#ifdef YOUTUBE_DEV_ID
key = YOUTUBE_DEV_ID;
#else
key = g_settings.youtube_dev_id;
#endif
}
cYTFeedParser::~cYTFeedParser()
{
curl_easy_cleanup(curl_handle);
}
size_t cYTFeedParser::CurlWriteToString(void *ptr, size_t size, size_t nmemb, void *data)
{
if (size * nmemb > 0) {
std::string* pStr = (std::string*) data;
pStr->append((char*) ptr, nmemb);
}
return size*nmemb;
}
bool cYTFeedParser::getUrl(std::string &url, std::string &answer, CURL *_curl_handle)
{
if (!_curl_handle)
_curl_handle = curl_handle;
curl_easy_setopt(_curl_handle, CURLOPT_URL, url.c_str());
curl_easy_setopt(_curl_handle, CURLOPT_WRITEFUNCTION, &cYTFeedParser::CurlWriteToString);
curl_easy_setopt(_curl_handle, CURLOPT_FILE, (void *)&answer);
curl_easy_setopt(_curl_handle, CURLOPT_FAILONERROR, 1);
curl_easy_setopt(_curl_handle, CURLOPT_TIMEOUT, URL_TIMEOUT);
curl_easy_setopt(_curl_handle, CURLOPT_NOSIGNAL, (long)1);
curl_easy_setopt(_curl_handle, CURLOPT_SSL_VERIFYPEER, false);
if(!g_settings.softupdate_proxyserver.empty()) {
curl_easy_setopt(_curl_handle, CURLOPT_PROXY, g_settings.softupdate_proxyserver.c_str());
if(!g_settings.softupdate_proxyusername.empty()) {
std::string tmp = g_settings.softupdate_proxyusername + ":" + g_settings.softupdate_proxypassword;
curl_easy_setopt(_curl_handle, CURLOPT_PROXYUSERPWD, tmp.c_str());
}
}
char cerror[CURL_ERROR_SIZE] = {0};
curl_easy_setopt(_curl_handle, CURLOPT_ERRORBUFFER, cerror);
printf("try to get [%s] ...\n", url.c_str());
CURLcode httpres = curl_easy_perform(_curl_handle);
printf("http: res %d size %d\n", httpres, (int)answer.size());
if (httpres != 0 || answer.empty()) {
printf("error: %s\n", cerror);
return false;
}
return true;
}
bool cYTFeedParser::DownloadUrl(std::string &url, std::string &file, CURL *_curl_handle)
{
if (!_curl_handle)
_curl_handle = curl_handle;
FILE * fp = fopen(file.c_str(), "wb");
if (fp == NULL) {
perror(file.c_str());
return false;
}
curl_easy_setopt(_curl_handle, CURLOPT_URL, url.c_str());
curl_easy_setopt(_curl_handle, CURLOPT_WRITEFUNCTION, NULL);
curl_easy_setopt(_curl_handle, CURLOPT_FILE, fp);
curl_easy_setopt(_curl_handle, CURLOPT_FAILONERROR, 1);
curl_easy_setopt(_curl_handle, CURLOPT_TIMEOUT, URL_TIMEOUT);
curl_easy_setopt(_curl_handle, CURLOPT_NOSIGNAL, (long)1);
curl_easy_setopt(_curl_handle, CURLOPT_SSL_VERIFYPEER, false);
if(!g_settings.softupdate_proxyserver.empty()) {
curl_easy_setopt(_curl_handle, CURLOPT_PROXY, g_settings.softupdate_proxyserver.c_str());
if(!g_settings.softupdate_proxyusername.empty()) {
std::string tmp = g_settings.softupdate_proxyusername + ":" + g_settings.softupdate_proxypassword;
curl_easy_setopt(_curl_handle, CURLOPT_PROXYUSERPWD, tmp.c_str());
}
}
char cerror[CURL_ERROR_SIZE] = {0};
curl_easy_setopt(_curl_handle, CURLOPT_ERRORBUFFER, cerror);
printf("try to get [%s] ...\n", url.c_str());
CURLcode httpres = curl_easy_perform(_curl_handle);
double dsize;
curl_easy_getinfo(_curl_handle, CURLINFO_SIZE_DOWNLOAD, &dsize);
fclose(fp);
printf("http: res %d size %g.\n", httpres, dsize);
if (httpres != 0) {
printf("curl error: %s\n", cerror);
unlink(file.c_str());
return false;
}
return true;
}
void cYTFeedParser::decodeUrl(std::string &url)
{
char * str = curl_easy_unescape(curl_handle, url.c_str(), 0, NULL);
if(str)
url = str;
curl_free(str);
}
void cYTFeedParser::encodeUrl(std::string &txt)
{
char * str = curl_easy_escape(curl_handle, txt.c_str(), txt.length());
if(str)
txt = str;
curl_free(str);
}
void cYTFeedParser::splitString(std::string &str, std::string delim, std::vector<std::string> &strlist, int start)
{
strlist.clear();
std::string::size_type end = 0;
while ((end = str.find(delim, start)) != std::string::npos) {
strlist.push_back(str.substr(start, end - start));
start = end + delim.size();
}
strlist.push_back(str.substr(start));
}
void cYTFeedParser::splitString(std::string &str, std::string delim, std::map<std::string,std::string> &strmap, int start)
{
std::string::size_type end = 0;
if ((end = str.find(delim, start)) != std::string::npos) {
strmap[str.substr(start, end - start)] = str.substr(end - start + delim.size());
}
}
bool cYTFeedParser::saveToFile(const char * name, std::string str)
{
FILE * fp = fopen(name, "w+");
if (fp) {
fprintf(fp, "%s", str.c_str());
fclose(fp);
return false;
}
printf("cYTFeedParser::saveToFile: failed to open %s\n", name);
return false;
}
std::string cYTFeedParser::getXmlName(xmlNodePtr node)
{
std::string result;
const char * name = xmlGetName(node);
if (name)
result = name;
return result;
}
std::string cYTFeedParser::getXmlAttr(xmlNodePtr node, const char * attr)
{
std::string result;
const char * value = xmlGetAttribute(node, attr);
if (value)
result = value;
return result;
}
std::string cYTFeedParser::getXmlData(xmlNodePtr node)
{
std::string result;
const char * value = xmlGetData(node);
if (value)
result = value;
return result;
}
bool cYTFeedParser::parseFeedJSON(std::string &answer)
{
std::string errMsg = "";
Json::Value root;
std::ostringstream ss;
std::ifstream fh(curfeedfile.c_str(),std::ifstream::in);
ss << fh.rdbuf();
std::string filedata = ss.str();
bool parsedSuccess = parseJsonFromString(filedata, &root, NULL);
if(!parsedSuccess)
{
parsedSuccess = parseJsonFromString(answer, &root, &errMsg);
}
if(!parsedSuccess)
{
printf("Failed to parse JSON\n");
printf("%s\n", errMsg.c_str());
return false;
}
next.clear();
prev.clear();
//TODO
total.clear();
start.clear();
next = root.get("nextPageToken", "").asString();
prev = root.get("prevPageToken", "").asString();
cYTVideoInfo vinfo;
Json::Value elements = root["items"];
for(unsigned int i=0; i<elements.size();++i)
{
OnProgress(i, elements.size(), g_Locale->getText(LOCALE_MOVIEBROWSER_SCAN_FOR_MOVIES));
#ifdef DEBUG_PARSER
printf("=========================================================\n");
printf("Element %d in elements\n", i);
printf("%s\n", elements[i]);
#endif
if(elements[i]["id"].type() == Json::objectValue) {
vinfo.id = elements[i]["id"].get("videoId", "").asString();
}
else if(elements[i]["id"].type() == Json::stringValue) {
vinfo.id = elements[i].get("id", "").asString();
}
vinfo.title = elements[i]["snippet"].get("title", "").asString();
vinfo.description = elements[i]["snippet"].get("description", "").asString();
vinfo.published = elements[i]["snippet"].get("publishedAt", "").asString().substr(0, 10);
std::string thumbnail = elements[i]["snippet"]["thumbnails"]["default"].get("url", "").asString();
// save thumbnail "default", if "high" not found
vinfo.thumbnail = elements[i]["snippet"]["thumbnails"]["high"].get("url", thumbnail).asString();
vinfo.author = elements[i]["snippet"].get("channelTitle", "unkown").asString();
vinfo.category = "";
parseFeedDetailsJSON(&vinfo);
#ifdef DEBUG_PARSER
printf("prevPageToken: %s\n", prevPageToken.c_str());
printf("nextPageToken: %s\n", nextPageToken.c_str());
printf("vinfo.id: %s\n", vinfo.id.c_str());
printf("vinfo.description: %s\n", vinfo.description.c_str());
printf("vinfo.published: %s\n", vinfo.published.c_str());
printf("vinfo.title: %s\n", vinfo.title.c_str());
printf("vinfo.thumbnail: %s\n", vinfo.thumbnail.c_str());
#endif
if (!vinfo.id.empty()) {
vinfo.ret = false;
videos.push_back(vinfo);
}
}
GetVideoUrls();
std::vector<cYTVideoInfo>::iterator pos = videos.begin();
while (pos != videos.end())
if ((*pos).ret)
++pos;
else
pos = videos.erase(pos);
parsed = !videos.empty();
return parsed;
}
bool cYTFeedParser::parseFeedDetailsJSON(cYTVideoInfo* vinfo)
{
vinfo->duration = 0;
// See at https://developers.google.com/youtube/v3/docs/videos
std::string url = "https://www.googleapis.com/youtube/v3/videos?id=" + vinfo->id + "&part=contentDetails&key=" + key;
std::string answer;
if (!getUrl(url, answer))
return false;
std::string errMsg = "";
Json::Value root;
bool parsedSuccess = parseJsonFromString(answer, &root, &errMsg);
if (!parsedSuccess) {
printf("Failed to parse JSON\n");
printf("%s\n", errMsg.c_str());
return false;
}
Json::Value elements = root["items"];
std::string duration = elements[0]["contentDetails"].get("duration", "").asString();
if (duration.find("PT") != std::string::npos) {
int h=0, m=0, s=0;
if (duration.find("H") != std::string::npos) {
sscanf(duration.c_str(), "PT%dH%dM%dS", &h, &m, &s);
}
else if (duration.find("M") != std::string::npos) {
sscanf(duration.c_str(), "PT%dM%dS", &m, &s);
}
else if (duration.find("S") != std::string::npos) {
sscanf(duration.c_str(), "PT%dS", &s);
}
// printf(">>>>> duration: %s, h: %d, m: %d, s: %d\n", duration.c_str(), h, m, s);
vinfo->duration = h*3600 + m*60 + s;
}
return true;
}
bool cYTFeedParser::supportedFormat(int fmt)
{
for (int *fmtp = itags; *fmtp; fmtp++)
if (*fmtp == fmt)
return true;
return false;
}
bool cYTFeedParser::decodeVideoInfo(std::string &answer, cYTVideoInfo &vinfo)
{
bool ret = false;
decodeUrl(answer);
#if 0
std::string infofile = thumbnail_dir;
infofile += "/";
infofile += vinfo.id;
infofile += ".txt";
saveToFile(infofile.c_str(), answer);
#endif
if(answer.find("token=") == std::string::npos)
return ret;
//FIXME check expire
std::vector<std::string> ulist;
std::string::size_type fmt = answer.find("url_encoded_fmt_stream_map=");
if (fmt != std::string::npos) {
fmt = answer.find("=", fmt);
splitString(answer, ",", ulist, fmt+1);
for (unsigned i = 0; i < ulist.size(); i++) {
#if 0 // to decode all params
decodeUrl(ulist[i]);
printf("URL: %s\n", ulist[i].c_str());
#endif
std::map<std::string,std::string> smap;
std::vector<std::string> uparams;
splitString(ulist[i], "&", uparams);
if (uparams.size() < 3)
continue;
for (unsigned j = 0; j < uparams.size(); j++) {
decodeUrl(uparams[j]);
#ifdef DEBUG_PARSER
printf(" param: %s\n", uparams[j].c_str());
#endif
splitString(uparams[j], "=", smap);
}
#ifdef DEBUG_PARSER
printf("=========================================================\n");
#endif
cYTVideoUrl yurl;
yurl.url = smap["url"];
std::string::size_type ptr = smap["url"].find("signature=");
if (ptr != std::string::npos)
{
ptr = smap["url"].find("=", ptr);
smap["url"].erase(0,ptr+1);
if((ptr = smap["url"].find("&")) != std::string::npos)
yurl.sig = smap["url"].substr(0,ptr);
}
int id = atoi(smap["itag"].c_str());
if (supportedFormat(id) && !yurl.url.empty() && !yurl.sig.empty()) {
yurl.quality = smap["quality"];
yurl.type = smap["type"];
vinfo.formats.insert(yt_urlmap_pair_t(id, yurl));
ret = true;
}
}
}
return ret;
}
bool cYTFeedParser::ParseFeed(std::string &url)
{
videos.clear();
std::string answer;
curfeedfile = thumbnail_dir;
curfeedfile += "/";
curfeedfile += curfeed;
curfeedfile += ".xml";
#ifdef CACHE_FILES
if(!DownloadUrl(url, cfile))
return false;
#else
if (!getUrl(url, answer))
return false;
#endif
return parseFeedJSON(answer);
}
bool cYTFeedParser::ParseFeed(yt_feed_mode_t mode, std::string search, std::string vid, yt_feed_orderby_t orderby)
{
std::string answer;
std::string url = "https://www.googleapis.com/youtube/v3/search?";
bool append_res = true;
std::string trailer;
if (mode < FEED_LAST) {
switch(mode) {
//FIXME APIv3: we dont have the parameter "time".
case MOST_POPULAR:
default:
//trailer = "&time=today";
curfeed = "&chart=mostPopular";
break;
case MOST_POPULAR_ALL_TIME:
curfeed = "&chart=mostPopular";
break;
}
url = "https://www.googleapis.com/youtube/v3/videos?part=snippet";
if (!region.empty()) {
url += "&regionCode=";
url += region;
}
url += curfeed;
}
else if (mode == NEXT) {
if (next.empty())
return false;
url = nextprevurl;
url += "&pageToken=";
url += next;
append_res = false;
}
else if (mode == PREV) {
if (prev.empty())
return false;
url = nextprevurl;
url += "&pageToken=";
url += prev;
append_res = false;
}
else if (mode == RELATED) {
if (vid.empty())
return false;
url = "https://www.googleapis.com/youtube/v3/videos/";
url += vid;
url += "/related?";
}
else if (mode == SEARCH) {
if (search.empty())
return false;
encodeUrl(search);
url = "https://www.googleapis.com/youtube/v3/search?q=";
url += search;
url += "&part=snippet";
//FIXME locale for "title" and "videoCount"
const char *orderby_values[] = { "date","relevance","viewCount","rating","title","videoCount"};
url += "&order=" + std::string(orderby_values[orderby & 3]);
}
feedmode = mode;
if (append_res) {
url += "&maxResults=";
char res[10];
sprintf(res, "%d", max_results);
url+= res;
url += "&key=" + key;
nextprevurl = url;
}
return ParseFeed(url);
}
bool cYTFeedParser::ParseVideoInfo(cYTVideoInfo &vinfo, CURL *_curl_handle)
{
bool ret = false;
std::vector<std::string> estr;
estr.push_back("&el=embedded");
estr.push_back("&el=vevo");
estr.push_back("&el=detailpage");
for (unsigned i = 0; i < estr.size(); i++) {
std::string vurl = "http://www.youtube.com/get_video_info?video_id=";
vurl += vinfo.id;
vurl += estr[i];
vurl += "&ps=default&eurl=&gl=US&hl=en";
printf("cYTFeedParser::ParseVideoInfo: get [%s]\n", vurl.c_str());
std::string answer;
if (!getUrl(vurl, answer, _curl_handle))
continue;
ret = decodeVideoInfo(answer, vinfo);
if (ret)
break;
}
vinfo.ret = ret;
return ret;
}
void *cYTFeedParser::DownloadThumbnailsThread(void *arg)
{
set_threadname("YT::DownloadThumbnails");
bool ret = true;
cYTFeedParser *caller = (cYTFeedParser *)arg;
CURL *c = curl_easy_init();
unsigned int i;
do {
OpenThreads::ScopedLock<OpenThreads::Mutex> m_lock(caller->mutex);
i = caller->worker_index++;
} while (i < caller->videos.size() && ((ret &= caller->DownloadThumbnail(caller->videos[i], c)) || true));
curl_easy_cleanup(c);
pthread_exit(&ret);
}
bool cYTFeedParser::DownloadThumbnail(cYTVideoInfo &vinfo, CURL *_curl_handle)
{
if (!_curl_handle)
_curl_handle = curl_handle;
bool found = false;
if (!vinfo.thumbnail.empty()) {
std::string fname = thumbnail_dir + "/" + vinfo.id + ".jpg";
found = !access(fname, F_OK);
if (!found) {
for (int *fmtp = itags; *fmtp && !found; fmtp++)
found = cYTCache::getInstance()->getNameIfExists(fname, vinfo.id, *fmtp);
}
if (!found)
found = DownloadUrl(vinfo.thumbnail, fname, _curl_handle);
if (found)
vinfo.tfile = fname;
}
return found;
}
bool cYTFeedParser::DownloadThumbnails()
{
bool ret = true;
if (mkdir(thumbnail_dir.c_str(), 0755) && errno != EEXIST) {
perror(thumbnail_dir.c_str());
return false;
}
unsigned int max_threads = concurrent_downloads;
if (videos.size() < max_threads)
max_threads = videos.size();
pthread_t ta[max_threads];
worker_index = 0;
for (unsigned i = 0; i < max_threads; i++)
pthread_create(&ta[i], NULL, cYTFeedParser::DownloadThumbnailsThread, this);
for (unsigned i = 0; i < max_threads; i++) {
void *r;
pthread_join(ta[i], &r);
ret &= *((bool *)r);
}
return ret;
}
void *cYTFeedParser::GetVideoUrlsThread(void *arg)
{
set_threadname("YT::GetVideoUrls");
int ret = 0;
cYTFeedParser *caller = (cYTFeedParser *)arg;
CURL *c = curl_easy_init();
unsigned int i;
do {
OpenThreads::ScopedLock<OpenThreads::Mutex> m_lock(caller->mutex);
i = caller->worker_index++;
} while (i < caller->videos.size() && ((ret |= caller->ParseVideoInfo(caller->videos[i], c)) || true));
curl_easy_cleanup(c);
pthread_exit(&ret);
}
bool cYTFeedParser::GetVideoUrls()
{
int ret = 0;
unsigned int max_threads = concurrent_downloads;
if (videos.size() < max_threads)
max_threads = videos.size();
pthread_t ta[max_threads];
worker_index = 0;
for (unsigned i = 0; i < max_threads; i++)
pthread_create(&ta[i], NULL, cYTFeedParser::GetVideoUrlsThread, this);
for (unsigned i = 0; i < max_threads; i++) {
void *r;
pthread_join(ta[i], &r);
ret |= *((int *)r);
}
return ret;
}
void cYTFeedParser::Cleanup(bool delete_thumbnails)
{
printf("cYTFeedParser::Cleanup: %d videos\n", (int)videos.size());
if (delete_thumbnails) {
for (unsigned i = 0; i < videos.size(); i++) {
unlink(videos[i].tfile.c_str());
}
}
unlink(curfeedfile.c_str());
videos.clear();
parsed = false;
feedmode = -1;
}
void cYTFeedParser::SetThumbnailDir(std::string &_thumbnail_dir)
{
thumbnail_dir = _thumbnail_dir;
}
void cYTFeedParser::Dump()
{
printf("feed: %d videos\n", (int)videos.size());
for (unsigned i = 0; i < videos.size(); i++)
videos[i].Dump();
}