/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* An implementation for an NGLayout-style content sink that knows how to build an RDF content model from XML-serialized RDF. For more information on the RDF/XML syntax, see http://www.w3.org/TR/REC-rdf-syntax/ This code is based on the final W3C Recommendation, http://www.w3.org/TR/1999/REC-rdf-syntax-19990222. Open Issues ------------------ 1) factoring code with nsXMLContentSink - There's some amount of common code between this and the HTML content sink. This will increase as we support more and more HTML elements. How can code from XML/HTML be factored? 2) We don't support the `parseType' attribute on the Description tag; therefore, it is impossible to "inline" raw XML in this implemenation. 3) We don't build the reifications at parse time due to the footprint overhead it would incur for large RDF documents. (It may be possible to attach a "reification" wrapper datasource that would present this information at query-time.) Because of this, the `bagID' attribute is not processed correctly. 4) No attempt is made to `resolve URIs' to a canonical form (the specification hints that an implementation should do this). This is omitted for the obvious reason that we can ill afford to resolve each URI reference. */ #include "nsCOMPtr.h" #include "nsInterfaceHashtable.h" #include "nsIContentSink.h" #include "nsIRDFContainer.h" #include "nsIRDFContainerUtils.h" #include "nsIRDFContentSink.h" #include "nsIRDFNode.h" #include "nsIRDFService.h" #include "nsIRDFXMLSink.h" #include "nsIServiceManager.h" #include "nsIURL.h" #include "nsIXMLContentSink.h" #include "nsRDFCID.h" #include "nsTArray.h" #include "nsXPIDLString.h" #include "prlog.h" #include "rdf.h" #include "rdfutil.h" #include "nsReadableUtils.h" #include "nsIExpatSink.h" #include "nsCRT.h" #include "nsIAtom.h" #include "nsStaticAtom.h" #include "nsIScriptError.h" #include "nsIDTD.h" using namespace mozilla; /////////////////////////////////////////////////////////////////////// enum RDFContentSinkState { eRDFContentSinkState_InProlog, eRDFContentSinkState_InDocumentElement, eRDFContentSinkState_InDescriptionElement, eRDFContentSinkState_InContainerElement, eRDFContentSinkState_InPropertyElement, eRDFContentSinkState_InMemberElement, eRDFContentSinkState_InEpilog }; enum RDFContentSinkParseMode { eRDFContentSinkParseMode_Resource, eRDFContentSinkParseMode_Literal, eRDFContentSinkParseMode_Int, eRDFContentSinkParseMode_Date }; typedef NS_STDCALL_FUNCPROTO(nsresult, nsContainerTestFn, nsIRDFContainerUtils, IsAlt, (nsIRDFDataSource*, nsIRDFResource*, bool*)); typedef NS_STDCALL_FUNCPROTO(nsresult, nsMakeContainerFn, nsIRDFContainerUtils, MakeAlt, (nsIRDFDataSource*, nsIRDFResource*, nsIRDFContainer**)); class RDFContentSinkImpl : public nsIRDFContentSink, public nsIExpatSink { public: RDFContentSinkImpl(); // nsISupports NS_DECL_ISUPPORTS NS_DECL_NSIEXPATSINK // nsIContentSink NS_IMETHOD WillParse(void); NS_IMETHOD WillBuildModel(nsDTDMode aDTDMode); NS_IMETHOD DidBuildModel(bool aTerminated); NS_IMETHOD WillInterrupt(void); NS_IMETHOD WillResume(void); NS_IMETHOD SetParser(nsParserBase* aParser); virtual void FlushPendingNotifications(mozFlushType aType) { } NS_IMETHOD SetDocumentCharset(nsACString& aCharset) { return NS_OK; } virtual nsISupports *GetTarget() { return nullptr; } // nsIRDFContentSink NS_IMETHOD Init(nsIURI* aURL); NS_IMETHOD SetDataSource(nsIRDFDataSource* aDataSource); NS_IMETHOD GetDataSource(nsIRDFDataSource*& aDataSource); // pseudo constants static int32_t gRefCnt; static nsIRDFService* gRDFService; static nsIRDFContainerUtils* gRDFContainerUtils; static nsIRDFResource* kRDF_type; static nsIRDFResource* kRDF_instanceOf; // XXX should be RDF:type static nsIRDFResource* kRDF_Alt; static nsIRDFResource* kRDF_Bag; static nsIRDFResource* kRDF_Seq; static nsIRDFResource* kRDF_nextVal; #define RDF_ATOM(name_, value_) static nsIAtom* name_; #include "nsRDFContentSinkAtomList.h" #undef RDF_ATOM typedef struct ContainerInfo { nsIRDFResource** mType; nsContainerTestFn mTestFn; nsMakeContainerFn mMakeFn; } ContainerInfo; protected: virtual ~RDFContentSinkImpl(); // Text management void ParseText(nsIRDFNode **aResult); nsresult FlushText(); nsresult AddText(const char16_t* aText, int32_t aLength); // RDF-specific parsing nsresult OpenRDF(const char16_t* aName); nsresult OpenObject(const char16_t* aName ,const char16_t** aAttributes); nsresult OpenProperty(const char16_t* aName, const char16_t** aAttributes); nsresult OpenMember(const char16_t* aName, const char16_t** aAttributes); nsresult OpenValue(const char16_t* aName, const char16_t** aAttributes); nsresult GetIdAboutAttribute(const char16_t** aAttributes, nsIRDFResource** aResource, bool* aIsAnonymous = nullptr); nsresult GetResourceAttribute(const char16_t** aAttributes, nsIRDFResource** aResource); nsresult AddProperties(const char16_t** aAttributes, nsIRDFResource* aSubject, int32_t* aCount = nullptr); void SetParseMode(const char16_t **aAttributes); char16_t* mText; int32_t mTextLength; int32_t mTextSize; /** * From the set of given attributes, this method extracts the * namespace definitions and feeds them to the datasource. * These can then be suggested to the serializer to be used again. * Hopefully, this will keep namespace definitions intact in a * parse - serialize cycle. */ void RegisterNamespaces(const char16_t **aAttributes); /** * Extracts the localname from aExpatName, the name that the Expat parser * passes us. * aLocalName will contain the localname in aExpatName. * The return value is a dependent string containing just the namespace. */ const nsDependentSubstring SplitExpatName(const char16_t *aExpatName, nsIAtom **aLocalName); enum eContainerType { eBag, eSeq, eAlt }; nsresult InitContainer(nsIRDFResource* aContainerType, nsIRDFResource* aContainer); nsresult ReinitContainer(nsIRDFResource* aContainerType, nsIRDFResource* aContainer); // The datasource in which we're assigning assertions nsCOMPtr mDataSource; // A hash of all the node IDs referred to nsInterfaceHashtable mNodeIDMap; // The current state of the content sink RDFContentSinkState mState; RDFContentSinkParseMode mParseMode; // content stack management int32_t PushContext(nsIRDFResource *aContext, RDFContentSinkState aState, RDFContentSinkParseMode aParseMode); nsresult PopContext(nsIRDFResource *&aContext, RDFContentSinkState &aState, RDFContentSinkParseMode &aParseMode); nsIRDFResource* GetContextElement(int32_t ancestor = 0); struct RDFContextStackElement { nsCOMPtr mResource; RDFContentSinkState mState; RDFContentSinkParseMode mParseMode; }; nsAutoTArray* mContextStack; nsIURI* mDocumentURL; private: #ifdef PR_LOGGING static PRLogModuleInfo* gLog; #endif }; int32_t RDFContentSinkImpl::gRefCnt = 0; nsIRDFService* RDFContentSinkImpl::gRDFService; nsIRDFContainerUtils* RDFContentSinkImpl::gRDFContainerUtils; nsIRDFResource* RDFContentSinkImpl::kRDF_type; nsIRDFResource* RDFContentSinkImpl::kRDF_instanceOf; nsIRDFResource* RDFContentSinkImpl::kRDF_Alt; nsIRDFResource* RDFContentSinkImpl::kRDF_Bag; nsIRDFResource* RDFContentSinkImpl::kRDF_Seq; nsIRDFResource* RDFContentSinkImpl::kRDF_nextVal; #ifdef PR_LOGGING PRLogModuleInfo* RDFContentSinkImpl::gLog; #endif //////////////////////////////////////////////////////////////////////// #define RDF_ATOM(name_, value_) nsIAtom* RDFContentSinkImpl::name_; #include "nsRDFContentSinkAtomList.h" #undef RDF_ATOM #define RDF_ATOM(name_, value_) NS_STATIC_ATOM_BUFFER(name_##_buffer, value_) #include "nsRDFContentSinkAtomList.h" #undef RDF_ATOM static const nsStaticAtom rdf_atoms[] = { #define RDF_ATOM(name_, value_) NS_STATIC_ATOM(name_##_buffer, &RDFContentSinkImpl::name_), #include "nsRDFContentSinkAtomList.h" #undef RDF_ATOM }; RDFContentSinkImpl::RDFContentSinkImpl() : mText(nullptr), mTextLength(0), mTextSize(0), mState(eRDFContentSinkState_InProlog), mParseMode(eRDFContentSinkParseMode_Literal), mContextStack(nullptr), mDocumentURL(nullptr) { if (gRefCnt++ == 0) { NS_DEFINE_CID(kRDFServiceCID, NS_RDFSERVICE_CID); nsresult rv = CallGetService(kRDFServiceCID, &gRDFService); NS_ASSERTION(NS_SUCCEEDED(rv), "unable to get RDF service"); if (NS_SUCCEEDED(rv)) { rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "type"), &kRDF_type); rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "instanceOf"), &kRDF_instanceOf); rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "Alt"), &kRDF_Alt); rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "Bag"), &kRDF_Bag); rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "Seq"), &kRDF_Seq); rv = gRDFService->GetResource(NS_LITERAL_CSTRING(RDF_NAMESPACE_URI "nextVal"), &kRDF_nextVal); } NS_DEFINE_CID(kRDFContainerUtilsCID, NS_RDFCONTAINERUTILS_CID); rv = CallGetService(kRDFContainerUtilsCID, &gRDFContainerUtils); NS_RegisterStaticAtoms(rdf_atoms); } #ifdef PR_LOGGING if (! gLog) gLog = PR_NewLogModule("nsRDFContentSink"); #endif } RDFContentSinkImpl::~RDFContentSinkImpl() { #ifdef DEBUG_REFS --gInstanceCount; fprintf(stdout, "%d - RDF: RDFContentSinkImpl\n", gInstanceCount); #endif NS_IF_RELEASE(mDocumentURL); if (mContextStack) { PR_LOG(gLog, PR_LOG_WARNING, ("rdfxml: warning! unclosed tag")); // XXX we should never need to do this, but, we'll write the // code all the same. If someone left the content stack dirty, // pop all the elements off the stack and release them. int32_t i = mContextStack->Length(); while (0 < i--) { nsIRDFResource* resource = nullptr; RDFContentSinkState state; RDFContentSinkParseMode parseMode; PopContext(resource, state, parseMode); #ifdef PR_LOGGING // print some fairly useless debugging info // XXX we should save line numbers on the context stack: this'd // be about 1000x more helpful. if (resource) { nsXPIDLCString uri; resource->GetValue(getter_Copies(uri)); PR_LOG(gLog, PR_LOG_NOTICE, ("rdfxml: uri=%s", (const char*) uri)); } #endif NS_IF_RELEASE(resource); } delete mContextStack; } moz_free(mText); if (--gRefCnt == 0) { NS_IF_RELEASE(gRDFService); NS_IF_RELEASE(gRDFContainerUtils); NS_IF_RELEASE(kRDF_type); NS_IF_RELEASE(kRDF_instanceOf); NS_IF_RELEASE(kRDF_Alt); NS_IF_RELEASE(kRDF_Bag); NS_IF_RELEASE(kRDF_Seq); NS_IF_RELEASE(kRDF_nextVal); } } //////////////////////////////////////////////////////////////////////// // nsISupports interface NS_IMPL_ADDREF(RDFContentSinkImpl) NS_IMPL_RELEASE(RDFContentSinkImpl) NS_IMETHODIMP RDFContentSinkImpl::QueryInterface(REFNSIID iid, void** result) { NS_PRECONDITION(result, "null ptr"); if (! result) return NS_ERROR_NULL_POINTER; NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID); NS_DEFINE_IID(kIExpatSinkIID, NS_IEXPATSINK_IID); NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); NS_DEFINE_IID(kIXMLContentSinkIID, NS_IXMLCONTENT_SINK_IID); NS_DEFINE_IID(kIRDFContentSinkIID, NS_IRDFCONTENTSINK_IID); *result = nullptr; if (iid.Equals(kIRDFContentSinkIID) || iid.Equals(kIXMLContentSinkIID) || iid.Equals(kIContentSinkIID) || iid.Equals(kISupportsIID)) { *result = static_cast(this); AddRef(); return NS_OK; } else if (iid.Equals(kIExpatSinkIID)) { *result = static_cast(this); AddRef(); return NS_OK; } return NS_NOINTERFACE; } NS_IMETHODIMP RDFContentSinkImpl::HandleStartElement(const char16_t *aName, const char16_t **aAtts, uint32_t aAttsCount, uint32_t aLineNumber) { FlushText(); nsresult rv = NS_ERROR_UNEXPECTED; // XXX RegisterNamespaces(aAtts); switch (mState) { case eRDFContentSinkState_InProlog: rv = OpenRDF(aName); break; case eRDFContentSinkState_InDocumentElement: rv = OpenObject(aName,aAtts); break; case eRDFContentSinkState_InDescriptionElement: rv = OpenProperty(aName,aAtts); break; case eRDFContentSinkState_InContainerElement: rv = OpenMember(aName,aAtts); break; case eRDFContentSinkState_InPropertyElement: case eRDFContentSinkState_InMemberElement: rv = OpenValue(aName,aAtts); break; case eRDFContentSinkState_InEpilog: PR_LOG(gLog, PR_LOG_WARNING, ("rdfxml: unexpected content in epilog at line %d", aLineNumber)); break; } return rv; } NS_IMETHODIMP RDFContentSinkImpl::HandleEndElement(const char16_t *aName) { FlushText(); nsIRDFResource* resource; if (NS_FAILED(PopContext(resource, mState, mParseMode))) { // XXX parser didn't catch unmatched tags? #ifdef PR_LOGGING if (PR_LOG_TEST(gLog, PR_LOG_WARNING)) { nsAutoString tagStr(aName); char* tagCStr = ToNewCString(tagStr); PR_LogPrint ("rdfxml: extra close tag '%s' at line %d", tagCStr, 0/*XXX fix me */); NS_Free(tagCStr); } #endif return NS_ERROR_UNEXPECTED; // XXX } // If we've just popped a member or property element, _now_ is the // time to add that element to the graph. switch (mState) { case eRDFContentSinkState_InMemberElement: { nsCOMPtr container; NS_NewRDFContainer(getter_AddRefs(container)); container->Init(mDataSource, GetContextElement(1)); container->AppendElement(resource); } break; case eRDFContentSinkState_InPropertyElement: { mDataSource->Assert(GetContextElement(1), GetContextElement(0), resource, true); } break; default: break; } if (mContextStack->IsEmpty()) mState = eRDFContentSinkState_InEpilog; NS_IF_RELEASE(resource); return NS_OK; } NS_IMETHODIMP RDFContentSinkImpl::HandleComment(const char16_t *aName) { return NS_OK; } NS_IMETHODIMP RDFContentSinkImpl::HandleCDataSection(const char16_t *aData, uint32_t aLength) { return aData ? AddText(aData, aLength) : NS_OK; } NS_IMETHODIMP RDFContentSinkImpl::HandleDoctypeDecl(const nsAString & aSubset, const nsAString & aName, const nsAString & aSystemId, const nsAString & aPublicId, nsISupports* aCatalogData) { return NS_OK; } NS_IMETHODIMP RDFContentSinkImpl::HandleCharacterData(const char16_t *aData, uint32_t aLength) { return aData ? AddText(aData, aLength) : NS_OK; } NS_IMETHODIMP RDFContentSinkImpl::HandleProcessingInstruction(const char16_t *aTarget, const char16_t *aData) { return NS_OK; } NS_IMETHODIMP RDFContentSinkImpl::HandleXMLDeclaration(const char16_t *aVersion, const char16_t *aEncoding, int32_t aStandalone) { return NS_OK; } NS_IMETHODIMP RDFContentSinkImpl::ReportError(const char16_t* aErrorText, const char16_t* aSourceText, nsIScriptError *aError, bool *_retval) { NS_PRECONDITION(aError && aSourceText && aErrorText, "Check arguments!!!"); // The expat driver should report the error. *_retval = true; return NS_OK; } //////////////////////////////////////////////////////////////////////// // nsIContentSink interface NS_IMETHODIMP RDFContentSinkImpl::WillParse(void) { return NS_OK; } NS_IMETHODIMP RDFContentSinkImpl::WillBuildModel(nsDTDMode) { if (mDataSource) { nsCOMPtr sink = do_QueryInterface(mDataSource); if (sink) return sink->BeginLoad(); } return NS_OK; } NS_IMETHODIMP RDFContentSinkImpl::DidBuildModel(bool aTerminated) { if (mDataSource) { nsCOMPtr sink = do_QueryInterface(mDataSource); if (sink) return sink->EndLoad(); } return NS_OK; } NS_IMETHODIMP RDFContentSinkImpl::WillInterrupt(void) { if (mDataSource) { nsCOMPtr sink = do_QueryInterface(mDataSource); if (sink) return sink->Interrupt(); } return NS_OK; } NS_IMETHODIMP RDFContentSinkImpl::WillResume(void) { if (mDataSource) { nsCOMPtr sink = do_QueryInterface(mDataSource); if (sink) return sink->Resume(); } return NS_OK; } NS_IMETHODIMP RDFContentSinkImpl::SetParser(nsParserBase* aParser) { return NS_OK; } //////////////////////////////////////////////////////////////////////// // nsIRDFContentSink interface NS_IMETHODIMP RDFContentSinkImpl::Init(nsIURI* aURL) { NS_PRECONDITION(aURL != nullptr, "null ptr"); if (! aURL) return NS_ERROR_NULL_POINTER; mDocumentURL = aURL; NS_ADDREF(aURL); mState = eRDFContentSinkState_InProlog; return NS_OK; } NS_IMETHODIMP RDFContentSinkImpl::SetDataSource(nsIRDFDataSource* aDataSource) { NS_PRECONDITION(aDataSource != nullptr, "SetDataSource null ptr"); mDataSource = aDataSource; NS_ASSERTION(mDataSource != nullptr,"Couldn't QI RDF DataSource"); return NS_OK; } NS_IMETHODIMP RDFContentSinkImpl::GetDataSource(nsIRDFDataSource*& aDataSource) { aDataSource = mDataSource; NS_IF_ADDREF(aDataSource); return NS_OK; } //////////////////////////////////////////////////////////////////////// // Text buffering static bool rdf_IsDataInBuffer(char16_t* buffer, int32_t length) { for (int32_t i = 0; i < length; ++i) { if (buffer[i] == ' ' || buffer[i] == '\t' || buffer[i] == '\n' || buffer[i] == '\r') continue; return true; } return false; } void RDFContentSinkImpl::ParseText(nsIRDFNode **aResult) { // XXXwaterson wasteful, but we'd need to make a copy anyway to be // able to call nsIRDFService::Get[Resource|Literal|...](). nsAutoString value; value.Append(mText, mTextLength); value.Trim(" \t\n\r"); switch (mParseMode) { case eRDFContentSinkParseMode_Literal: { nsIRDFLiteral *result; gRDFService->GetLiteral(value.get(), &result); *aResult = result; } break; case eRDFContentSinkParseMode_Resource: { nsIRDFResource *result; gRDFService->GetUnicodeResource(value, &result); *aResult = result; } break; case eRDFContentSinkParseMode_Int: { nsresult err; int32_t i = value.ToInteger(&err); nsIRDFInt *result; gRDFService->GetIntLiteral(i, &result); *aResult = result; } break; case eRDFContentSinkParseMode_Date: { PRTime t = rdf_ParseDate(nsDependentCString(NS_LossyConvertUTF16toASCII(value).get(), value.Length())); nsIRDFDate *result; gRDFService->GetDateLiteral(t, &result); *aResult = result; } break; default: NS_NOTREACHED("unknown parse type"); break; } } nsresult RDFContentSinkImpl::FlushText() { nsresult rv = NS_OK; if (0 != mTextLength) { if (rdf_IsDataInBuffer(mText, mTextLength)) { // XXX if there's anything but whitespace, then we'll // create a text node. switch (mState) { case eRDFContentSinkState_InMemberElement: { nsCOMPtr node; ParseText(getter_AddRefs(node)); nsCOMPtr container; NS_NewRDFContainer(getter_AddRefs(container)); container->Init(mDataSource, GetContextElement(1)); container->AppendElement(node); } break; case eRDFContentSinkState_InPropertyElement: { nsCOMPtr node; ParseText(getter_AddRefs(node)); mDataSource->Assert(GetContextElement(1), GetContextElement(0), node, true); } break; default: // just ignore it break; } } mTextLength = 0; } return rv; } nsresult RDFContentSinkImpl::AddText(const char16_t* aText, int32_t aLength) { // Create buffer when we first need it if (0 == mTextSize) { mText = (char16_t *) moz_malloc(sizeof(char16_t) * 4096); if (!mText) { return NS_ERROR_OUT_OF_MEMORY; } mTextSize = 4096; } // Copy data from string into our buffer; grow the buffer as needed. // It never shrinks, but since the content sink doesn't stick around, // this shouldn't be a bloat issue. int32_t amount = mTextSize - mTextLength; if (amount < aLength) { // Grow the buffer by at least a factor of two to prevent thrashing. // Since PR_REALLOC will leave mText intact if the call fails, // don't clobber mText or mTextSize until the new mem is allocated. int32_t newSize = (2 * mTextSize > (mTextSize + aLength)) ? (2 * mTextSize) : (mTextSize + aLength); char16_t* newText = (char16_t *) moz_realloc(mText, sizeof(char16_t) * newSize); if (!newText) return NS_ERROR_OUT_OF_MEMORY; mTextSize = newSize; mText = newText; } memcpy(&mText[mTextLength], aText, sizeof(char16_t) * aLength); mTextLength += aLength; return NS_OK; } bool rdf_RequiresAbsoluteURI(const nsString& uri) { // cheap shot at figuring out if this requires an absolute url translation return !(StringBeginsWith(uri, NS_LITERAL_STRING("urn:")) || StringBeginsWith(uri, NS_LITERAL_STRING("chrome:"))); } nsresult RDFContentSinkImpl::GetIdAboutAttribute(const char16_t** aAttributes, nsIRDFResource** aResource, bool* aIsAnonymous) { // This corresponds to the dirty work of production [6.5] nsresult rv = NS_OK; nsAutoString nodeID; nsCOMPtr localName; for (; *aAttributes; aAttributes += 2) { const nsDependentSubstring& nameSpaceURI = SplitExpatName(aAttributes[0], getter_AddRefs(localName)); // We'll accept either `ID' or `rdf:ID' (ibid with `about' or // `rdf:about') in the spirit of being liberal towards the // input that we receive. if (!nameSpaceURI.IsEmpty() && !nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI)) { continue; } // XXX you can't specify both, but we'll just pick up the // first thing that was specified and ignore the other. if (localName == kAboutAtom) { if (aIsAnonymous) *aIsAnonymous = false; nsAutoString relURI(aAttributes[1]); if (rdf_RequiresAbsoluteURI(relURI)) { nsAutoCString uri; rv = mDocumentURL->Resolve(NS_ConvertUTF16toUTF8(aAttributes[1]), uri); if (NS_FAILED(rv)) return rv; return gRDFService->GetResource(uri, aResource); } return gRDFService->GetResource(NS_ConvertUTF16toUTF8(aAttributes[1]), aResource); } else if (localName == kIdAtom) { if (aIsAnonymous) *aIsAnonymous = false; // In the spirit of leniency, we do not bother trying to // enforce that this be a valid "XML Name" (see // http://www.w3.org/TR/REC-xml#NT-Nmtoken), as per // 6.21. If we wanted to, this would be where to do it. // Construct an in-line resource whose URI is the // document's URI plus the XML name specified in the ID // attribute. nsAutoCString name; nsAutoCString ref('#'); AppendUTF16toUTF8(aAttributes[1], ref); rv = mDocumentURL->Resolve(ref, name); if (NS_FAILED(rv)) return rv; return gRDFService->GetResource(name, aResource); } else if (localName == kNodeIdAtom) { nodeID.Assign(aAttributes[1]); } else if (localName == kAboutEachAtom) { // XXX we don't deal with aboutEach... //PR_LOG(gLog, PR_LOG_WARNING, // ("rdfxml: ignoring aboutEach at line %d", // aNode.GetSourceLineNumber())); } } // Otherwise, we couldn't find anything, so just gensym one... if (aIsAnonymous) *aIsAnonymous = true; // If nodeID is present, check if we already know about it. If we've seen // the nodeID before, use the same resource, otherwise generate a new one. if (!nodeID.IsEmpty()) { mNodeIDMap.Get(nodeID,aResource); if (!*aResource) { rv = gRDFService->GetAnonymousResource(aResource); mNodeIDMap.Put(nodeID,*aResource); } } else { rv = gRDFService->GetAnonymousResource(aResource); } return rv; } nsresult RDFContentSinkImpl::GetResourceAttribute(const char16_t** aAttributes, nsIRDFResource** aResource) { nsCOMPtr localName; nsAutoString nodeID; for (; *aAttributes; aAttributes += 2) { const nsDependentSubstring& nameSpaceURI = SplitExpatName(aAttributes[0], getter_AddRefs(localName)); // We'll accept `resource' or `rdf:resource', under the spirit // that we should be liberal towards the input that we // receive. if (!nameSpaceURI.IsEmpty() && !nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI)) { continue; } // XXX you can't specify both, but we'll just pick up the // first thing that was specified and ignore the other. if (localName == kResourceAtom) { // XXX Take the URI and make it fully qualified by // sticking it into the document's URL. This may not be // appropriate... nsAutoString relURI(aAttributes[1]); if (rdf_RequiresAbsoluteURI(relURI)) { nsresult rv; nsAutoCString uri; rv = mDocumentURL->Resolve(NS_ConvertUTF16toUTF8(aAttributes[1]), uri); if (NS_FAILED(rv)) return rv; return gRDFService->GetResource(uri, aResource); } return gRDFService->GetResource(NS_ConvertUTF16toUTF8(aAttributes[1]), aResource); } else if (localName == kNodeIdAtom) { nodeID.Assign(aAttributes[1]); } } // If nodeID is present, check if we already know about it. If we've seen // the nodeID before, use the same resource, otherwise generate a new one. if (!nodeID.IsEmpty()) { mNodeIDMap.Get(nodeID,aResource); if (!*aResource) { nsresult rv; rv = gRDFService->GetAnonymousResource(aResource); if (NS_FAILED(rv)) { return rv; } mNodeIDMap.Put(nodeID,*aResource); } return NS_OK; } return NS_ERROR_FAILURE; } nsresult RDFContentSinkImpl::AddProperties(const char16_t** aAttributes, nsIRDFResource* aSubject, int32_t* aCount) { if (aCount) *aCount = 0; nsCOMPtr localName; for (; *aAttributes; aAttributes += 2) { const nsDependentSubstring& nameSpaceURI = SplitExpatName(aAttributes[0], getter_AddRefs(localName)); // skip 'xmlns' directives, these are "meta" information if (nameSpaceURI.EqualsLiteral("http://www.w3.org/2000/xmlns/")) { continue; } // skip `about', `ID', `resource', and 'nodeID' attributes (either with or // without the `rdf:' prefix); these are all "special" and // should've been dealt with by the caller. if (localName == kAboutAtom || localName == kIdAtom || localName == kResourceAtom || localName == kNodeIdAtom) { if (nameSpaceURI.IsEmpty() || nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI)) continue; } // Skip `parseType', `RDF:parseType', and `NC:parseType'. This // is meta-information that will be handled in SetParseMode. if (localName == kParseTypeAtom) { if (nameSpaceURI.IsEmpty() || nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI) || nameSpaceURI.EqualsLiteral(NC_NAMESPACE_URI)) { continue; } } NS_ConvertUTF16toUTF8 propertyStr(nameSpaceURI); propertyStr.Append(nsAtomCString(localName)); // Add the assertion to RDF nsCOMPtr property; gRDFService->GetResource(propertyStr, getter_AddRefs(property)); nsCOMPtr target; gRDFService->GetLiteral(aAttributes[1], getter_AddRefs(target)); mDataSource->Assert(aSubject, property, target, true); } return NS_OK; } void RDFContentSinkImpl::SetParseMode(const char16_t **aAttributes) { nsCOMPtr localName; for (; *aAttributes; aAttributes += 2) { const nsDependentSubstring& nameSpaceURI = SplitExpatName(aAttributes[0], getter_AddRefs(localName)); if (localName == kParseTypeAtom) { nsDependentString v(aAttributes[1]); if (nameSpaceURI.IsEmpty() || nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI)) { if (v.EqualsLiteral("Resource")) mParseMode = eRDFContentSinkParseMode_Resource; break; } else if (nameSpaceURI.EqualsLiteral(NC_NAMESPACE_URI)) { if (v.EqualsLiteral("Date")) mParseMode = eRDFContentSinkParseMode_Date; else if (v.EqualsLiteral("Integer")) mParseMode = eRDFContentSinkParseMode_Int; break; } } } } //////////////////////////////////////////////////////////////////////// // RDF-specific routines used to build the model nsresult RDFContentSinkImpl::OpenRDF(const char16_t* aName) { // ensure that we're actually reading RDF by making sure that the // opening tag is , where "rdf:" corresponds to whatever // they've declared the standard RDF namespace to be. nsCOMPtr localName; const nsDependentSubstring& nameSpaceURI = SplitExpatName(aName, getter_AddRefs(localName)); if (!nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI) || localName != kRDFAtom) { // PR_LOG(gLog, PR_LOG_ALWAYS, // ("rdfxml: expected RDF:RDF at line %d", // aNode.GetSourceLineNumber())); return NS_ERROR_UNEXPECTED; } PushContext(nullptr, mState, mParseMode); mState = eRDFContentSinkState_InDocumentElement; return NS_OK; } nsresult RDFContentSinkImpl::OpenObject(const char16_t* aName, const char16_t** aAttributes) { // an "object" non-terminal is either a "description", a "typed // node", or a "container", so this change the content sink's // state appropriately. nsCOMPtr localName; const nsDependentSubstring& nameSpaceURI = SplitExpatName(aName, getter_AddRefs(localName)); // Figure out the URI of this object, and create an RDF node for it. nsCOMPtr source; GetIdAboutAttribute(aAttributes, getter_AddRefs(source)); // If there is no `ID' or `about', then there's not much we can do. if (! source) return NS_ERROR_FAILURE; // Push the element onto the context stack PushContext(source, mState, mParseMode); // Now figure out what kind of state transition we need to // make. We'll either be going into a mode where we parse a // description or a container. bool isaTypedNode = true; if (nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI)) { isaTypedNode = false; if (localName == kDescriptionAtom) { // it's a description mState = eRDFContentSinkState_InDescriptionElement; } else if (localName == kBagAtom) { // it's a bag container InitContainer(kRDF_Bag, source); mState = eRDFContentSinkState_InContainerElement; } else if (localName == kSeqAtom) { // it's a seq container InitContainer(kRDF_Seq, source); mState = eRDFContentSinkState_InContainerElement; } else if (localName == kAltAtom) { // it's an alt container InitContainer(kRDF_Alt, source); mState = eRDFContentSinkState_InContainerElement; } else { // heh, that's not *in* the RDF namespace: just treat it // like a typed node isaTypedNode = true; } } if (isaTypedNode) { NS_ConvertUTF16toUTF8 typeStr(nameSpaceURI); typeStr.Append(nsAtomCString(localName)); nsCOMPtr type; nsresult rv = gRDFService->GetResource(typeStr, getter_AddRefs(type)); if (NS_FAILED(rv)) return rv; rv = mDataSource->Assert(source, kRDF_type, type, true); if (NS_FAILED(rv)) return rv; mState = eRDFContentSinkState_InDescriptionElement; } AddProperties(aAttributes, source); return NS_OK; } nsresult RDFContentSinkImpl::OpenProperty(const char16_t* aName, const char16_t** aAttributes) { nsresult rv; // an "object" non-terminal is either a "description", a "typed // node", or a "container", so this change the content sink's // state appropriately. nsCOMPtr localName; const nsDependentSubstring& nameSpaceURI = SplitExpatName(aName, getter_AddRefs(localName)); NS_ConvertUTF16toUTF8 propertyStr(nameSpaceURI); propertyStr.Append(nsAtomCString(localName)); nsCOMPtr property; rv = gRDFService->GetResource(propertyStr, getter_AddRefs(property)); if (NS_FAILED(rv)) return rv; // See if they've specified a 'resource' attribute, in which case // they mean *that* to be the object of this property. nsCOMPtr target; GetResourceAttribute(aAttributes, getter_AddRefs(target)); bool isAnonymous = false; if (! target) { // See if an 'ID' attribute has been specified, in which case // this corresponds to the fourth form of [6.12]. // XXX strictly speaking, we should reject the RDF/XML as // invalid if they've specified both an 'ID' and a 'resource' // attribute. Bah. // XXX strictly speaking, 'about=' isn't allowed here, but // what the hell. GetIdAboutAttribute(aAttributes, getter_AddRefs(target), &isAnonymous); } if (target) { // They specified an inline resource for the value of this // property. Create an RDF resource for the inline resource // URI, add the properties to it, and attach the inline // resource to its parent. int32_t count; rv = AddProperties(aAttributes, target, &count); NS_ASSERTION(NS_SUCCEEDED(rv), "problem adding properties"); if (NS_FAILED(rv)) return rv; if (count || !isAnonymous) { // If the resource was "anonymous" (i.e., they hadn't // explicitly set an ID or resource attribute), then we'll // only assert this property from the context element *if* // there were properties specified on the anonymous // resource. rv = mDataSource->Assert(GetContextElement(0), property, target, true); if (NS_FAILED(rv)) return rv; } // XXX Technically, we should _not_ fall through here and push // the element onto the stack: this is supposed to be a closed // node. But right now I'm lazy and the code will just Do The // Right Thing so long as the RDF is well-formed. } // Push the element onto the context stack and change state. PushContext(property, mState, mParseMode); mState = eRDFContentSinkState_InPropertyElement; SetParseMode(aAttributes); return NS_OK; } nsresult RDFContentSinkImpl::OpenMember(const char16_t* aName, const char16_t** aAttributes) { // ensure that we're actually reading a member element by making // sure that the opening tag is , where "rdf:" corresponds // to whatever they've declared the standard RDF namespace to be. nsresult rv; nsCOMPtr localName; const nsDependentSubstring& nameSpaceURI = SplitExpatName(aName, getter_AddRefs(localName)); if (!nameSpaceURI.EqualsLiteral(RDF_NAMESPACE_URI) || localName != kLiAtom) { PR_LOG(gLog, PR_LOG_ALWAYS, ("rdfxml: expected RDF:li at line %d", -1)); // XXX pass in line number return NS_ERROR_UNEXPECTED; } // The parent element is the container. nsIRDFResource* container = GetContextElement(0); if (! container) return NS_ERROR_NULL_POINTER; nsIRDFResource* resource; if (NS_SUCCEEDED(rv = GetResourceAttribute(aAttributes, &resource))) { // Okay, this node has an RDF:resource="..." attribute. That // means that it's a "referenced item," as covered in [6.29]. nsCOMPtr c; NS_NewRDFContainer(getter_AddRefs(c)); c->Init(mDataSource, container); c->AppendElement(resource); // XXX Technically, we should _not_ fall through here and push // the element onto the stack: this is supposed to be a closed // node. But right now I'm lazy and the code will just Do The // Right Thing so long as the RDF is well-formed. NS_RELEASE(resource); } // Change state. Pushing a null context element is a bit weird, // but the idea is that there really is _no_ context "property". // The contained element will use nsIRDFContainer::AppendElement() to add // the element to the container, which requires only the container // and the element to be added. PushContext(nullptr, mState, mParseMode); mState = eRDFContentSinkState_InMemberElement; SetParseMode(aAttributes); return NS_OK; } nsresult RDFContentSinkImpl::OpenValue(const char16_t* aName, const char16_t** aAttributes) { // a "value" can either be an object or a string: we'll only get // *here* if it's an object, as raw text is added as a leaf. return OpenObject(aName,aAttributes); } //////////////////////////////////////////////////////////////////////// // namespace resolution void RDFContentSinkImpl::RegisterNamespaces(const char16_t **aAttributes) { nsCOMPtr sink = do_QueryInterface(mDataSource); if (!sink) { return; } NS_NAMED_LITERAL_STRING(xmlns, "http://www.w3.org/2000/xmlns/"); for (; *aAttributes; aAttributes += 2) { // check the namespace const char16_t* attr = aAttributes[0]; const char16_t* xmlnsP = xmlns.BeginReading(); while (*attr == *xmlnsP) { ++attr; ++xmlnsP; } if (*attr != 0xFFFF || xmlnsP != xmlns.EndReading()) { continue; } // get the localname (or "xmlns" for the default namespace) const char16_t* endLocal = ++attr; while (*endLocal && *endLocal != 0xFFFF) { ++endLocal; } nsDependentSubstring lname(attr, endLocal); nsCOMPtr preferred = do_GetAtom(lname); if (preferred == kXMLNSAtom) { preferred = nullptr; } sink->AddNameSpace(preferred, nsDependentString(aAttributes[1])); } } //////////////////////////////////////////////////////////////////////// // Qualified name resolution const nsDependentSubstring RDFContentSinkImpl::SplitExpatName(const char16_t *aExpatName, nsIAtom **aLocalName) { /** * Expat can send the following: * localName * namespaceURIlocalName * namespaceURIlocalNameprefix * * and we use 0xFFFF for the . * */ const char16_t *uriEnd = aExpatName; const char16_t *nameStart = aExpatName; const char16_t *pos; for (pos = aExpatName; *pos; ++pos) { if (*pos == 0xFFFF) { if (uriEnd != aExpatName) { break; } uriEnd = pos; nameStart = pos + 1; } } const nsDependentSubstring& nameSpaceURI = Substring(aExpatName, uriEnd); *aLocalName = NS_NewAtom(Substring(nameStart, pos)).take(); return nameSpaceURI; } nsresult RDFContentSinkImpl::InitContainer(nsIRDFResource* aContainerType, nsIRDFResource* aContainer) { // Do the right kind of initialization based on the container // 'type' resource, and the state of the container (i.e., 'make' a // new container vs. 'reinitialize' the container). nsresult rv; static const ContainerInfo gContainerInfo[] = { { &RDFContentSinkImpl::kRDF_Alt, &nsIRDFContainerUtils::IsAlt, &nsIRDFContainerUtils::MakeAlt }, { &RDFContentSinkImpl::kRDF_Bag, &nsIRDFContainerUtils::IsBag, &nsIRDFContainerUtils::MakeBag }, { &RDFContentSinkImpl::kRDF_Seq, &nsIRDFContainerUtils::IsSeq, &nsIRDFContainerUtils::MakeSeq }, { 0, 0, 0 }, }; for (const ContainerInfo* info = gContainerInfo; info->mType != 0; ++info) { if (*info->mType != aContainerType) continue; bool isContainer; rv = (gRDFContainerUtils->*(info->mTestFn))(mDataSource, aContainer, &isContainer); if (isContainer) { rv = ReinitContainer(aContainerType, aContainer); } else { rv = (gRDFContainerUtils->*(info->mMakeFn))(mDataSource, aContainer, nullptr); } return rv; } NS_NOTREACHED("not an RDF container type"); return NS_ERROR_FAILURE; } nsresult RDFContentSinkImpl::ReinitContainer(nsIRDFResource* aContainerType, nsIRDFResource* aContainer) { // Mega-kludge to deal with the fact that Make[Seq|Alt|Bag] is // idempotent, and as such, containers will have state (e.g., // RDF:nextVal) maintained in the graph across loads. This // re-initializes each container's RDF:nextVal to '1', and 'marks' // the container as such. nsresult rv; nsCOMPtr one; rv = gRDFService->GetLiteral(MOZ_UTF16("1"), getter_AddRefs(one)); if (NS_FAILED(rv)) return rv; // Re-initialize the 'nextval' property nsCOMPtr nextval; rv = mDataSource->GetTarget(aContainer, kRDF_nextVal, true, getter_AddRefs(nextval)); if (NS_FAILED(rv)) return rv; rv = mDataSource->Change(aContainer, kRDF_nextVal, nextval, one); if (NS_FAILED(rv)) return rv; // Re-mark as a container. XXX should be kRDF_type rv = mDataSource->Assert(aContainer, kRDF_instanceOf, aContainerType, true); NS_ASSERTION(NS_SUCCEEDED(rv), "unable to mark container as such"); if (NS_FAILED(rv)) return rv; return NS_OK; } //////////////////////////////////////////////////////////////////////// // Content stack management nsIRDFResource* RDFContentSinkImpl::GetContextElement(int32_t ancestor /* = 0 */) { if ((nullptr == mContextStack) || (uint32_t(ancestor) >= mContextStack->Length())) { return nullptr; } return mContextStack->ElementAt( mContextStack->Length()-ancestor-1).mResource; } int32_t RDFContentSinkImpl::PushContext(nsIRDFResource *aResource, RDFContentSinkState aState, RDFContentSinkParseMode aParseMode) { if (! mContextStack) { mContextStack = new nsAutoTArray(); if (! mContextStack) return 0; } RDFContextStackElement* e = mContextStack->AppendElement(); if (! e) return mContextStack->Length(); e->mResource = aResource; e->mState = aState; e->mParseMode = aParseMode; return mContextStack->Length(); } nsresult RDFContentSinkImpl::PopContext(nsIRDFResource *&aResource, RDFContentSinkState &aState, RDFContentSinkParseMode &aParseMode) { if ((nullptr == mContextStack) || (mContextStack->IsEmpty())) { return NS_ERROR_NULL_POINTER; } uint32_t i = mContextStack->Length() - 1; RDFContextStackElement &e = mContextStack->ElementAt(i); aResource = e.mResource; NS_IF_ADDREF(aResource); aState = e.mState; aParseMode = e.mParseMode; mContextStack->RemoveElementAt(i); return NS_OK; } //////////////////////////////////////////////////////////////////////// nsresult NS_NewRDFContentSink(nsIRDFContentSink** aResult) { NS_PRECONDITION(aResult != nullptr, "null ptr"); if (! aResult) return NS_ERROR_NULL_POINTER; RDFContentSinkImpl* sink = new RDFContentSinkImpl(); if (! sink) return NS_ERROR_OUT_OF_MEMORY; NS_ADDREF(sink); *aResult = sink; return NS_OK; }